# pip install tensorflow
# conda install pandas
# conda install seaborn
# conda install scikit-learn
# conda install plotly
# pip install --upgrade nbformat
# conda install Jinja2
# pip install keras-cv
# pip install tensorflow_datasets
| Label | Description |
|---|---|
| 0 | T-shirt/Top |
| 1 | Trouser |
| 2 | Pullover |
| 3 | Dress |
| 4 | Coat |
| 5 | Sandal |
| 6 | Shirt |
| 7 | Sneaker |
| 8 | Bag |
| 9 | Ankle Boot |
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from keras.datasets import fashion_mnist
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Conv2D,Flatten,MaxPooling2D,Dense,Flatten,Dropout,BatchNormalization,LeakyReLU
from numpy import mean
from numpy import std
from matplotlib import pyplot
from sklearn.model_selection import KFold
from keras.utils import to_categorical
from keras.models import Sequential
from keras.callbacks import EarlyStopping,ModelCheckpoint,ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator
from keras.models import load_model
from keras import regularizers
from sklearn.model_selection import train_test_split
import keras_cv
from numpy import mean
from numpy import std
from matplotlib import pyplot
from keras.optimizers import SGD
import gc
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import precision_score
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
X_train , X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=13, stratify=y_train)
# we have 80% train,10% test,10% validation
print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)
X_train = X_train.reshape(-1, 784)
X_test = X_test.reshape(-1, 784)
X_val = X_val.reshape(-1, 784)
(48000, 28, 28) (10000, 28, 28) (48000,) (10000,)
X_train_df = pd.DataFrame(X_train.reshape(-1, 784))
y_train_df = pd.DataFrame(y_train)
X_train_df
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 774 | 775 | 776 | 777 | 778 | 779 | 780 | 781 | 782 | 783 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | ... | 0 | 0 | 0 | 69 | 85 | 19 | 0 | 0 | 0 | 0 |
| 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 223 | 154 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 47995 | 0 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 87 | 217 | 184 | 0 | 0 | 0 | 0 |
| 47996 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | ... | 60 | 50 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
| 47997 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 47998 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 47999 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 14 | ... | 0 | 0 | 0 | 4 | 131 | 125 | 55 | 0 | 0 | 0 |
48000 rows × 784 columns
Dividing each of the pixels by 255 will normalize the pixels between 0 to 1 We normalize the pixels so that it can increase the speed of the learning process Neural Network processes inputs uses small weights values. Large inputs can disrupt or slow down learning process. It is good that we normalize the pixels.
print(X_train.min(), X_train.max())
# it seems like the data set provided by tensorflow is already between 0 to 255 . We can normalize it by dividing by 255 or so called pixel normalization
X_train = X_train / 255.0
X_test = X_test / 255.0
X_val = X_val / 255.0
0 255
number of data points for each class is exactly the same. This is so that there will not be biases for each class
types = y_train_df[0].unique()
types.sort()
print(types)
print(y_train_df[0].value_counts())
# y_ train is just an array of numbers from 0 to 9 that represent the class of the image
[0 1 2 3 4 5 6 7 8 9] 5 4800 4 4800 3 4800 7 4800 2 4800 9 4800 8 4800 0 4800 1 4800 6 4800 Name: 0, dtype: int64
class_names = ['T-shirt', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
print(class_names)
['T-shirt', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
array_of_each_class_position = [1,21,5,20,24,9,18,6,23,0]
# plot all images in array_of_each_class_position
plt.figure(figsize=(10,10))
for i in range(len(array_of_each_class_position)):
plt.subplot(5,2,i+1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.imshow(X_train_df.iloc[array_of_each_class_position[i]].values.reshape(28,28), cmap=plt.cm.binary)
plt.xlabel(class_names[y_train_df.iloc[array_of_each_class_position[i]][0]])
plt.show()
y_test_label = y_test
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
y_val = to_categorical(y_val)
X_train: uint8 NumPy array of grayscale image data with shapes (, 28, 28), containing the training data.
y_train: uint8 NumPy array of labels (integers in range 0-9) with shape (60000,) for the training data.
X_test: uint8 NumPy array of grayscale image data with shapes (10000, 28, 28), containing the test data.
y_test: uint8 NumPy array of labels (integers in range 0-9) with shape (10000,) for the test data.

# Baseline simple neural network
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from sklearn.metrics import f1_score,precision_score,recall_score
# fix random seed for reproducibility
seed = 1
np.random.seed(seed)
model = Sequential()
model.add(Dense(128, input_shape=(784,), activation='relu'))
model.add(Dense(10, activation='sigmoid'))
model.summary()
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.fit(X_train, y_train, epochs=5, verbose=2)
results=model.evaluate(X_test, y_test,verbose=2)
print("test loss,", results[0], "test acc:", results[1])
# Train your model and save its history
def plot_loss(loss,test_loss):
plt.figure()
plt.plot(loss)
plt.plot(test_loss)
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()
def plot_accuracy(acc,test_acc):
plt.figure()
plt.plot(acc)
plt.plot(test_acc)
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
h_callback = model.fit(X_train, y_train, epochs = 10,
validation_data=(X_val, y_val))
# Plot train vs test loss during training
plot_loss(h_callback.history['loss'], h_callback.history['val_loss'])
# Plot train vs test accuracy during training
plot_accuracy(h_callback.history['accuracy'], h_callback.history['val_accuracy'])
# Evaluate your model on the test set
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print('\nTest accuracy:', test_acc)
# function to get precision, recall and f1 score
def get_metrics(model,X_test = X_test):
predictions = np.argmax(model.predict(X_test), axis=-1)
precision = precision_score(y_test_labels, predictions, average='macro')
recall = recall_score(y_test_labels, predictions, average='macro')
f1 = f1_score(y_test_labels, predictions, average='macro')
return precision,recall,f1
precision,recall,f1 = get_metrics(model)
# Dataframe to keep track of all model scores
Model_scores = pd.DataFrame([['Dense 1 layer NN',test_acc,test_loss,precision,recall,f1]],columns=['Model','Accuracy','Loss','Precision','Recall','F1 Score'])
# memory leak
gc.collect()
tf.keras.backend.clear_session()
del model
Model: "sequential_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_4 (Dense) (None, 128) 100480
dense_5 (Dense) (None, 10) 1290
=================================================================
Total params: 101,770
Trainable params: 101,770
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
1500/1500 - 3s - loss: 0.5225 - accuracy: 0.8159 - 3s/epoch - 2ms/step
Epoch 2/5
1500/1500 - 3s - loss: 0.3938 - accuracy: 0.8588 - 3s/epoch - 2ms/step
Epoch 3/5
1500/1500 - 3s - loss: 0.3510 - accuracy: 0.8719 - 3s/epoch - 2ms/step
Epoch 4/5
1500/1500 - 3s - loss: 0.3258 - accuracy: 0.8801 - 3s/epoch - 2ms/step
Epoch 5/5
1500/1500 - 3s - loss: 0.3025 - accuracy: 0.8906 - 3s/epoch - 2ms/step
313/313 - 1s - loss: 0.3579 - accuracy: 0.8699 - 722ms/epoch - 2ms/step
test loss, 0.35788848996162415 test acc: 0.8698999881744385
Epoch 1/10
1500/1500 [==============================] - 5s 3ms/step - loss: 0.2898 - accuracy: 0.8930 - val_loss: 0.3166 - val_accuracy: 0.8863
Epoch 2/10
1500/1500 [==============================] - 4s 3ms/step - loss: 0.2736 - accuracy: 0.9000 - val_loss: 0.3181 - val_accuracy: 0.8860
Epoch 3/10
1500/1500 [==============================] - 4s 3ms/step - loss: 0.2620 - accuracy: 0.9036 - val_loss: 0.3213 - val_accuracy: 0.8835
Epoch 4/10
1500/1500 [==============================] - 4s 3ms/step - loss: 0.2543 - accuracy: 0.9056 - val_loss: 0.3195 - val_accuracy: 0.8860
Epoch 5/10
1500/1500 [==============================] - 4s 3ms/step - loss: 0.2424 - accuracy: 0.9086 - val_loss: 0.3004 - val_accuracy: 0.8939
Epoch 6/10
1500/1500 [==============================] - 4s 3ms/step - loss: 0.2354 - accuracy: 0.9107 - val_loss: 0.3265 - val_accuracy: 0.8840
Epoch 7/10
1500/1500 [==============================] - 4s 3ms/step - loss: 0.2265 - accuracy: 0.9168 - val_loss: 0.3079 - val_accuracy: 0.8896
Epoch 8/10
1500/1500 [==============================] - 4s 3ms/step - loss: 0.2201 - accuracy: 0.9181 - val_loss: 0.3211 - val_accuracy: 0.8892
Epoch 9/10
1500/1500 [==============================] - 4s 3ms/step - loss: 0.2131 - accuracy: 0.9201 - val_loss: 0.2983 - val_accuracy: 0.8976
Epoch 10/10
1500/1500 [==============================] - 4s 3ms/step - loss: 0.2071 - accuracy: 0.9233 - val_loss: 0.3196 - val_accuracy: 0.8910
313/313 - 1s - loss: 0.3597 - accuracy: 0.8798 - 629ms/epoch - 2ms/step Test accuracy: 0.879800021648407 313/313 [==============================] - 0s 1ms/step
Baseline model with at least 2 layers which performs slightly better than 1 layer However 2 layered model is clearly overfitted where testing learning curve diverges away from training learning curve and only performs a little better than model with 1 layer
model = Sequential()
model.add(Dense(128, input_shape=(784,), activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(10, activation='sigmoid'))
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.summary()
h_callback = model.fit(X_train, y_train,epochs = 5,
validation_data=(X_val, y_val))
# Evaluate your model on the test set
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
# Plot train vs test loss during training
plot_loss(h_callback.history['loss'], h_callback.history['val_loss'])
# Plot train vs test accuracy during training
plot_accuracy(h_callback.history['accuracy'], h_callback.history['val_accuracy'])
precision,recall,f1 = get_metrics(model)
Model_scores = pd.concat([Model_scores,pd.DataFrame([['Dense NN 2 layer',test_acc,test_loss,precision,recall,f1]],columns=Model_scores.columns)],ignore_index=True)
# memory leak
gc.collect()
tf.keras.backend.clear_session()
del model
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense (Dense) (None, 128) 100480
dense_1 (Dense) (None, 64) 8256
dense_2 (Dense) (None, 10) 650
=================================================================
Total params: 109,386
Trainable params: 109,386
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
1500/1500 [==============================] - 5s 3ms/step - loss: 0.5229 - accuracy: 0.8130 - val_loss: 0.4052 - val_accuracy: 0.8585
Epoch 2/5
1500/1500 [==============================] - 5s 3ms/step - loss: 0.3848 - accuracy: 0.8598 - val_loss: 0.3574 - val_accuracy: 0.8738
Epoch 3/5
1500/1500 [==============================] - 4s 3ms/step - loss: 0.3474 - accuracy: 0.8720 - val_loss: 0.3573 - val_accuracy: 0.8712
Epoch 4/5
1500/1500 [==============================] - 4s 3ms/step - loss: 0.3200 - accuracy: 0.8803 - val_loss: 0.3325 - val_accuracy: 0.8837
Epoch 5/5
1500/1500 [==============================] - 4s 3ms/step - loss: 0.3029 - accuracy: 0.8877 - val_loss: 0.3439 - val_accuracy: 0.8790
313/313 - 1s - loss: 0.3758 - accuracy: 0.8660 - 684ms/epoch - 2ms/step
313/313 [==============================] - 0s 1ms/step
model = Sequential()
model.add(Dense(128, input_shape=(784,), activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(10, activation='sigmoid'))
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.summary()
h_callback = model.fit(X_train, y_train,epochs = 10,
validation_data=(X_val, y_val))
# Evaluate your model on the test set
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
# Plot train vs test loss during training
plot_loss(h_callback.history['loss'], h_callback.history['val_loss'])
# Plot train vs test accuracy during training
plot_accuracy(h_callback.history['accuracy'], h_callback.history['val_accuracy'])
precision,recall,f1 = get_metrics(model)
Model_scores = pd.concat([Model_scores,pd.DataFrame([['Dense NN 3 layer',test_acc,test_loss,precision,recall,f1]],columns=Model_scores.columns)],ignore_index=True)
pred=model.predict(X_test)
pred=np.argmax(pred,axis=1)
classifation_matrix=confusion_matrix(y_test_label,pred)
plt.figure(figsize=(10,10))
sns.heatmap(classifation_matrix,annot=True,fmt='d')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()
# memory leak
gc.collect()
tf.keras.backend.clear_session()
del model
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense (Dense) (None, 128) 100480
dense_1 (Dense) (None, 64) 8256
dense_2 (Dense) (None, 32) 2080
dense_3 (Dense) (None, 10) 330
=================================================================
Total params: 111,146
Trainable params: 111,146
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
1500/1500 [==============================] - 6s 4ms/step - loss: 0.5295 - accuracy: 0.8121 - val_loss: 0.4245 - val_accuracy: 0.8512
Epoch 2/10
1500/1500 [==============================] - 5s 3ms/step - loss: 0.3870 - accuracy: 0.8579 - val_loss: 0.3781 - val_accuracy: 0.8669
Epoch 3/10
1500/1500 [==============================] - 5s 3ms/step - loss: 0.3475 - accuracy: 0.8718 - val_loss: 0.3468 - val_accuracy: 0.8726
Epoch 4/10
1500/1500 [==============================] - 5s 3ms/step - loss: 0.3246 - accuracy: 0.8790 - val_loss: 0.3460 - val_accuracy: 0.8734
Epoch 5/10
1500/1500 [==============================] - 5s 3ms/step - loss: 0.3057 - accuracy: 0.8874 - val_loss: 0.3281 - val_accuracy: 0.8808
Epoch 6/10
1500/1500 [==============================] - 5s 3ms/step - loss: 0.2909 - accuracy: 0.8917 - val_loss: 0.3209 - val_accuracy: 0.8846
Epoch 7/10
1500/1500 [==============================] - 5s 4ms/step - loss: 0.2766 - accuracy: 0.8967 - val_loss: 0.3312 - val_accuracy: 0.8808
Epoch 8/10
1500/1500 [==============================] - 5s 4ms/step - loss: 0.2675 - accuracy: 0.8999 - val_loss: 0.3079 - val_accuracy: 0.8900
Epoch 9/10
1500/1500 [==============================] - 6s 4ms/step - loss: 0.2531 - accuracy: 0.9056 - val_loss: 0.3151 - val_accuracy: 0.8895
Epoch 10/10
1500/1500 [==============================] - 5s 3ms/step - loss: 0.2499 - accuracy: 0.9047 - val_loss: 0.3324 - val_accuracy: 0.8852
313/313 - 1s - loss: 0.3728 - accuracy: 0.8729 - 744ms/epoch - 2ms/step
313/313 [==============================] - 0s 1ms/step 313/313 [==============================] - 0s 1ms/step
pros
cons

Saily Shah — Published On January 27, 2022 and Last Modified On March 15th, 2022 Shah,S Convolutional Neural Network: An Overview Available at: https://towardsdatascience.com/convolutional-neural-network-feature-map-and-filter-visualization-f75012a5a49c[Accessed : 4 november 2022 ]
Convolve ~ combine (one function or series) with another by forming their convolution which is also summing the element-wise product of 2 matrices (Shah,S 2022)

Striding convolutions : repeating what happen above but with strides; the filter moving through the image and getting a matrix of convolved features. The main purpose of doing this is to find features of a class which will then be used for classification of the images
- final matrix(convolved matrix) produced is smaller after retrieving main features of an image
Convolving helps neural network extract features performing better than MLP.
# Functions for plotting
from plotly.subplots import make_subplots
from plotly import tools
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
def create_trace(x,y,ylabel,color):
trace = go.Scatter(
x = x,y = y,
name=ylabel,
marker=dict(color=color),
mode = "markers+lines",
text=x
)
return trace
def plot_accuracy_and_loss(train_model):
hist = train_model.history
acc = hist['accuracy']
val_acc = hist['val_accuracy']
loss = hist['loss']
val_loss = hist['val_loss']
epochs = list(range(1,len(acc)+1))
trace_ta = create_trace(epochs,acc,"Training accuracy", "Green")
trace_va = create_trace(epochs,val_acc,"Validation accuracy", "Red")
trace_tl = create_trace(epochs,loss,"Training loss", "Blue")
trace_vl = create_trace(epochs,val_loss,"Validation loss", "Magenta")
fig = make_subplots(rows=1,cols=2, subplot_titles=('Training and validation accuracy',
'Training and validation loss'))
fig.append_trace(trace_ta,1,1)
fig.append_trace(trace_va,1,1)
fig.append_trace(trace_tl,1,2)
fig.append_trace(trace_vl,1,2)
fig['layout']['xaxis'].update(title = 'Epoch')
fig['layout']['xaxis2'].update(title = 'Epoch')
fig['layout']['yaxis'].update(title = 'Accuracy', range=[0,1])
fig['layout']['yaxis2'].update(title = 'Loss', range=[0,1])
iplot(fig, filename=f'accuracy-loss_{train_model}')
from tensorflow.keras.layers import Conv2D,Flatten
from numpy import mean
from numpy import std
from matplotlib import pyplot
from sklearn.model_selection import KFold
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.optimizers import SGD
from keras.layers import Dropout
from keras.layers import BatchNormalization
from keras.layers import Activation
from keras.layers import LeakyReLU
epochs = 15
num_classes = 10
seed = np.random.seed(1)
print('before',X_train.shape,X_test.shape)
print('before',y_train.shape,y_test.shape)
X_train = X_train.reshape((X_train.shape[0], 28, 28, 1))
X_test = X_test.reshape((X_test.shape[0], 28, 28, 1))
X_val = X_val.reshape((X_val.shape[0], 28, 28, 1))
print('after',X_train.shape,X_test.shape)
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1)))
fashion_model.add(Conv2D(64, (3, 3), activation='linear'))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='linear'))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model.summary()
h_callback = fashion_model.fit(X_train, y_train, epochs = 10,
validation_data=(X_val, y_val))
# Evaluate your model on the test set
test_loss, test_acc = fashion_model.evaluate(X_test, y_test, verbose=2)
# Plot train vs test loss during training
plot_accuracy_and_loss(h_callback)
# function to get precision, recall and f1 score
def get_metrics(model,X_test):
predictions = np.argmax(model.predict(X_test), axis=-1)
precision = precision_score(y_test_labels, predictions, average='macro')
recall = recall_score(y_test_labels, predictions, average='macro')
f1 = f1_score(y_test_labels, predictions, average='macro')
return precision,recall,f1
# Dataframe to keep track of all model scores
precision,recall,f1 = get_metrics(fashion_model,X_test)
Model_scores = pd.concat([Model_scores,pd.DataFrame([['CNN linear activation',test_acc,test_loss,precision,recall,f1]],columns=Model_scores.columns)],ignore_index=True)
before (48000, 28, 28, 1) (10000, 28, 28, 1)
before (48000, 10) (10000, 10)
after (48000, 28, 28, 1) (10000, 28, 28, 1)
Model: "sequential_3"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_6 (Conv2D) (None, 26, 26, 32) 320
conv2d_7 (Conv2D) (None, 24, 24, 64) 18496
flatten_3 (Flatten) (None, 36864) 0
dense_6 (Dense) (None, 128) 4718720
dense_7 (Dense) (None, 10) 1290
=================================================================
Total params: 4,738,826
Trainable params: 4,738,826
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
1500/1500 [==============================] - 8s 5ms/step - loss: 0.5588 - accuracy: 0.8115 - val_loss: 0.4796 - val_accuracy: 0.8313
Epoch 2/10
1500/1500 [==============================] - 7s 5ms/step - loss: 0.4604 - accuracy: 0.8391 - val_loss: 0.4960 - val_accuracy: 0.8266
Epoch 3/10
1500/1500 [==============================] - 7s 5ms/step - loss: 0.4477 - accuracy: 0.8424 - val_loss: 0.4856 - val_accuracy: 0.8360
Epoch 4/10
1500/1500 [==============================] - 7s 5ms/step - loss: 0.4396 - accuracy: 0.8465 - val_loss: 0.4795 - val_accuracy: 0.8413
Epoch 5/10
1500/1500 [==============================] - 7s 4ms/step - loss: 0.4344 - accuracy: 0.8472 - val_loss: 0.4729 - val_accuracy: 0.8393
Epoch 6/10
1500/1500 [==============================] - 7s 5ms/step - loss: 0.4259 - accuracy: 0.8484 - val_loss: 0.4604 - val_accuracy: 0.8425
Epoch 7/10
1500/1500 [==============================] - 7s 5ms/step - loss: 0.4179 - accuracy: 0.8528 - val_loss: 0.4861 - val_accuracy: 0.8350
Epoch 8/10
1500/1500 [==============================] - 8s 5ms/step - loss: 0.4136 - accuracy: 0.8541 - val_loss: 0.4914 - val_accuracy: 0.8280
Epoch 9/10
1500/1500 [==============================] - 7s 5ms/step - loss: 0.4069 - accuracy: 0.8558 - val_loss: 0.4850 - val_accuracy: 0.8398
Epoch 10/10
1500/1500 [==============================] - 7s 5ms/step - loss: 0.4044 - accuracy: 0.8560 - val_loss: 0.4952 - val_accuracy: 0.8300
313/313 [==============================] - 0s 1ms/step
#Understand the filters in the model
#Let us pick the first hidden layer as the layer of interest.
layer = fashion_model.layers #Conv layers at 0,
filters, biases = fashion_model.layers[0].get_weights()
print(layer[0].name, filters.shape)
# plot filters
fig1=plt.figure(figsize=(8, 12))
columns = 8
rows = 8
n_filters = 32 ## the number of filters in our first layer
for i in range(1, n_filters + 1):
f = filters[:, :, :, i-1]
fig1 =plt.subplot(rows, columns, i)
fig1.set_xticks([]) #Turn off axis
fig1.set_yticks([])
plt.imshow(f[:, :, 0], cmap='gray') #Show only the filters from 0th channel (R)
#ix += 1
plt.show()
pred=fashion_model.predict(X_test)
pred=np.argmax(pred,axis=1)
classifation_matrix=confusion_matrix(y_test_label,pred)
plt.figure(figsize=(10,10))
sns.heatmap(classifation_matrix,annot=True,fmt='d')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()
# memory leak
gc.collect()
tf.keras.backend.clear_session()
del fashion_model
conv2d_6 (3, 3, 1, 32)
313/313 [==============================] - 0s 1ms/step
Machines understand binary and it makes more sense to one_hot_encode to change it to an array of 0 and 1s instead of a range of 0 to 9
Its amazing how the filter is extracting key information on the various classes, showing the lines edge and features of the images it is looking out for
# baseline cnn model for fashion mnist
from numpy import mean
from numpy import std
from matplotlib import pyplot
from sklearn.model_selection import KFold
from keras.datasets import fashion_mnist
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.optimizers import SGD
from keras.callbacks import EarlyStopping
# evaluate a model using k-fold cross-validation
def evaluate_model(model,dataX, dataY, n_folds=5,valX=X_val,valy=y_val):
scores, histories = list(), list()
# prepare cross validation
kfold = KFold(n_folds, shuffle=True, random_state=1)
# enumerate splits
for train_ix, test_ix in kfold.split(dataX):
# select rows for train and test
trainX, trainY, testX, testY = dataX[train_ix], dataY[train_ix], dataX[test_ix], dataY[test_ix]
h_callback = EarlyStopping(monitor='val_loss', patience=5)
# fit model
history = model.fit(trainX, trainY, epochs=50, validation_data=(testX, testY), verbose=0,callbacks=[h_callback])
# evaluate model
_, acc = model.evaluate(valX, valy, verbose=0)
print('> %.3f' % (acc * 100.0))
# append scores
scores.append(acc)
histories.append(history)
return scores, histories
# plot diagnostic learning curves
def summarize_diagnostics(histories):
for i in range(len(histories)):
# plot loss
pyplot.subplot(211)
pyplot.title('Cross Entropy Loss')
pyplot.plot(histories[i].history['loss'], color='blue', label='train')
pyplot.plot(histories[i].history['val_loss'], color='orange', label='test')
# plot accuracy
pyplot.subplot(212)
pyplot.title('Classification Accuracy')
pyplot.plot(histories[i].history['accuracy'], color='blue', label='train')
pyplot.plot(histories[i].history['val_accuracy'], color='orange', label='test')
pyplot.legend()
pyplot.show()
# summarize model performance
def summarize_performance(scores):
# print summary
print('Accuracy: mean=%.3f std=%.3f, n=%d' % (mean(scores)*100, std(scores)*100, len(scores)))
# box and whisker plots of results
pyplot.boxplot(scores)
pyplot.show()
# run the test harness for evaluating a model
def run_test_harness(model,X_train, y_train):
# evaluate model
scores, histories= evaluate_model(model,X_train, y_train)
# learning curves
summarize_diagnostics(histories)
# summarize estimated performance
summarize_performance(scores)
del model
gc.collect()
tf.keras.backend.clear_session()

Softmax produces a probability score for all 10 classes . If the model is not too confident about the choice it makes , it will be summed up and added to logloss. The more uncertain the model is the higher the logloss

My hypothesis is that max pooling will be better for this dataset as it completely wipes out noise in a region by taking brightest
def model_max_pooling():
# define model
max_pool_model = Sequential()
max_pool_model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1)))
max_pool_model.add(MaxPooling2D((2, 2)))
max_pool_model.add(Flatten())
max_pool_model.add(Dense(128, activation='linear'))
max_pool_model.add(Dense(num_classes, activation='softmax'))
# compile model
max_pool_model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
return max_pool_model
run_test_harness(model_max_pooling(),X_train, y_train)
model_max_pooling = model_max_pooling()
model_max_pooling.fit(X_train, y_train, epochs=50, validation_data=(X_val, y_val), verbose=0)
# Evaluate your model on the test set
test_loss, test_acc = model_max_pooling.evaluate(X_test, y_test, verbose=2)
precision,recall,f1 = get_metrics(model_max_pooling,X_test)
Model_scores = pd.concat([Model_scores,pd.DataFrame([['CNN linear max pool',test_acc,test_loss,precision,recall,f1]],columns=Model_scores.columns)],ignore_index=True)
> 88.983 > 89.092 > 89.317 > 89.108 > 88.300
Accuracy: mean=88.960 std=0.347, n=5
313/313 [==============================] - 0s 1ms/step 313/313 - 1s - loss: 0.7517 - accuracy: 0.8714 - 678ms/epoch - 2ms/step 313/313 [==============================] - 0s 1ms/step
from keras.layers import AveragePooling2D
def model_average_pooling():
# define model
average_pool_model = Sequential()
average_pool_model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1)))
average_pool_model.add(AveragePooling2D((2, 2)))
average_pool_model.add(Flatten())
average_pool_model.add(Dense(128, activation='linear'))
average_pool_model.add(Dense(num_classes, activation='softmax'))
# compile model
average_pool_model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
return average_pool_model
run_test_harness(model_average_pooling(),X_train, y_train)
model_average_pooling = model_average_pooling()
model_average_pooling.fit(X_train, y_train, epochs=50, validation_data=(X_val, y_val), verbose=0)
# Evaluate your model on the test set
test_loss, test_acc = model_average_pooling.evaluate(X_test, y_test, verbose=2)
precision,recall,f1 = get_metrics(model_average_pooling,X_test)
Model_scores = pd.concat([Model_scores,pd.DataFrame([['CNN linear avg pool',test_acc,test_loss,precision,recall,f1]],columns=Model_scores.columns)],ignore_index=True)
> 84.092 > 84.442 > 84.925 > 84.667 > 84.958
Accuracy: mean=84.617 std=0.323, n=5
313/313 [==============================] - 0s 1ms/step 313/313 - 1s - loss: 0.4898 - accuracy: 0.8306 - 689ms/epoch - 2ms/step 313/313 [==============================] - 0s 1ms/step
%%time
fashion_model_batch = Sequential()
fashion_model_batch.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1)))
fashion_model_batch.add(Conv2D(64, (3, 3), activation='linear'))
fashion_model_batch.add(Flatten())
fashion_model_batch.add(Dense(128, activation='linear'))
fashion_model_batch.add(Dense(num_classes, activation='softmax'))
fashion_model_batch.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model_batch.summary()
h_callback = fashion_model.fit(X_train, y_train, epochs = 10, batch_size=32,validation_data=(X_test, y_test))
# Plot train vs test loss during training
plot_accuracy_and_loss(h_callback)
Model: "sequential_5"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_3 (Conv2D) (None, 26, 26, 32) 320
conv2d_4 (Conv2D) (None, 24, 24, 64) 18496
flatten_2 (Flatten) (None, 36864) 0
dense_13 (Dense) (None, 128) 4718720
dense_14 (Dense) (None, 10) 1290
=================================================================
Total params: 4,738,826
Trainable params: 4,738,826
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
1500/1500 [==============================] - 8s 5ms/step - loss: 0.4044 - accuracy: 0.8573 - val_loss: 0.4900 - val_accuracy: 0.8323
Epoch 2/10
1500/1500 [==============================] - 7s 5ms/step - loss: 0.4011 - accuracy: 0.8571 - val_loss: 0.4897 - val_accuracy: 0.8276
Epoch 3/10
1500/1500 [==============================] - 7s 5ms/step - loss: 0.3987 - accuracy: 0.8595 - val_loss: 0.5025 - val_accuracy: 0.8269
Epoch 4/10
1500/1500 [==============================] - 7s 5ms/step - loss: 0.3967 - accuracy: 0.8582 - val_loss: 0.5149 - val_accuracy: 0.8179
Epoch 5/10
1500/1500 [==============================] - 7s 5ms/step - loss: 0.3931 - accuracy: 0.8607 - val_loss: 0.5014 - val_accuracy: 0.8309
Epoch 6/10
1500/1500 [==============================] - 7s 5ms/step - loss: 0.3912 - accuracy: 0.8608 - val_loss: 0.4800 - val_accuracy: 0.8334
Epoch 7/10
1500/1500 [==============================] - 7s 5ms/step - loss: 0.3891 - accuracy: 0.8614 - val_loss: 0.4854 - val_accuracy: 0.8302
Epoch 8/10
1500/1500 [==============================] - 8s 5ms/step - loss: 0.3921 - accuracy: 0.8599 - val_loss: 0.4847 - val_accuracy: 0.8329
Epoch 9/10
1500/1500 [==============================] - 7s 5ms/step - loss: 0.3893 - accuracy: 0.8617 - val_loss: 0.4951 - val_accuracy: 0.8262
Epoch 10/10
1500/1500 [==============================] - 7s 5ms/step - loss: 0.3852 - accuracy: 0.8624 - val_loss: 0.5268 - val_accuracy: 0.8257
CPU times: total: 1min 37s Wall time: 1min 15s
%%time
fashion_model_batch = Sequential()
fashion_model_batch.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1)))
fashion_model_batch.add(Conv2D(64, (3, 3), activation='linear'))
fashion_model_batch.add(Flatten())
fashion_model_batch.add(Dense(128, activation='linear'))
fashion_model_batch.add(Dense(num_classes, activation='softmax'))
fashion_model_batch.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model_batch.summary()
h_callback = fashion_model.fit(X_train, y_train, epochs = 10, batch_size=64,validation_data=(X_test, y_test))
# Plot train vs test loss during training
plot_accuracy_and_loss(h_callback)
Model: "sequential_6"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_5 (Conv2D) (None, 26, 26, 32) 320
conv2d_6 (Conv2D) (None, 24, 24, 64) 18496
flatten_3 (Flatten) (None, 36864) 0
dense_15 (Dense) (None, 128) 4718720
dense_16 (Dense) (None, 10) 1290
=================================================================
Total params: 4,738,826
Trainable params: 4,738,826
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
750/750 [==============================] - 5s 6ms/step - loss: 0.3619 - accuracy: 0.8717 - val_loss: 0.4855 - val_accuracy: 0.8311
Epoch 2/10
750/750 [==============================] - 4s 6ms/step - loss: 0.3638 - accuracy: 0.8709 - val_loss: 0.4794 - val_accuracy: 0.8317
Epoch 3/10
750/750 [==============================] - 4s 6ms/step - loss: 0.3648 - accuracy: 0.8699 - val_loss: 0.4961 - val_accuracy: 0.8271
Epoch 4/10
750/750 [==============================] - 5s 6ms/step - loss: 0.3670 - accuracy: 0.8689 - val_loss: 0.4937 - val_accuracy: 0.8251
Epoch 5/10
750/750 [==============================] - 4s 6ms/step - loss: 0.3669 - accuracy: 0.8692 - val_loss: 0.4857 - val_accuracy: 0.8319
Epoch 6/10
750/750 [==============================] - 4s 6ms/step - loss: 0.3645 - accuracy: 0.8705 - val_loss: 0.4941 - val_accuracy: 0.8298
Epoch 7/10
750/750 [==============================] - 4s 6ms/step - loss: 0.3649 - accuracy: 0.8705 - val_loss: 0.4938 - val_accuracy: 0.8333
Epoch 8/10
750/750 [==============================] - 4s 6ms/step - loss: 0.3643 - accuracy: 0.8692 - val_loss: 0.4871 - val_accuracy: 0.8324
Epoch 9/10
750/750 [==============================] - 4s 6ms/step - loss: 0.3638 - accuracy: 0.8690 - val_loss: 0.4934 - val_accuracy: 0.8295
Epoch 10/10
750/750 [==============================] - 4s 6ms/step - loss: 0.3638 - accuracy: 0.8693 - val_loss: 0.4873 - val_accuracy: 0.8319
CPU times: total: 1min 2s Wall time: 45.4 s
%%time
fashion_model_batch = Sequential()
fashion_model_batch.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1)))
fashion_model_batch.add(Conv2D(64, (3, 3), activation='linear'))
fashion_model_batch.add(Flatten())
fashion_model_batch.add(Dense(128, activation='linear'))
fashion_model_batch.add(Dense(num_classes, activation='softmax'))
fashion_model_batch.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model_batch.summary()
h_callback = fashion_model.fit(X_train, y_train, epochs = 10, batch_size=128,validation_data=(X_test, y_test))
# Plot train vs test loss during training
plot_accuracy_and_loss(h_callback)
Model: "sequential_7"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_7 (Conv2D) (None, 26, 26, 32) 320
conv2d_8 (Conv2D) (None, 24, 24, 64) 18496
flatten_4 (Flatten) (None, 36864) 0
dense_17 (Dense) (None, 128) 4718720
dense_18 (Dense) (None, 10) 1290
=================================================================
Total params: 4,738,826
Trainable params: 4,738,826
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
375/375 [==============================] - 3s 7ms/step - loss: 0.3467 - accuracy: 0.8764 - val_loss: 0.4857 - val_accuracy: 0.8328
Epoch 2/10
375/375 [==============================] - 3s 7ms/step - loss: 0.3448 - accuracy: 0.8770 - val_loss: 0.4900 - val_accuracy: 0.8317
Epoch 3/10
375/375 [==============================] - 3s 7ms/step - loss: 0.3468 - accuracy: 0.8762 - val_loss: 0.4944 - val_accuracy: 0.8303
Epoch 4/10
375/375 [==============================] - 3s 7ms/step - loss: 0.3462 - accuracy: 0.8765 - val_loss: 0.4937 - val_accuracy: 0.8333
Epoch 5/10
375/375 [==============================] - 3s 7ms/step - loss: 0.3479 - accuracy: 0.8752 - val_loss: 0.5008 - val_accuracy: 0.8324
Epoch 6/10
375/375 [==============================] - 3s 7ms/step - loss: 0.3504 - accuracy: 0.8750 - val_loss: 0.5011 - val_accuracy: 0.8283
Epoch 7/10
375/375 [==============================] - 3s 7ms/step - loss: 0.3507 - accuracy: 0.8730 - val_loss: 0.4889 - val_accuracy: 0.8309
Epoch 8/10
375/375 [==============================] - 3s 7ms/step - loss: 0.3486 - accuracy: 0.8752 - val_loss: 0.4947 - val_accuracy: 0.8334
Epoch 9/10
375/375 [==============================] - 3s 7ms/step - loss: 0.3495 - accuracy: 0.8743 - val_loss: 0.4977 - val_accuracy: 0.8324
Epoch 10/10
375/375 [==============================] - 3s 7ms/step - loss: 0.3487 - accuracy: 0.8751 - val_loss: 0.4996 - val_accuracy: 0.8286
CPU times: total: 36.8 s Wall time: 26.9 s
The batch size is a hyperparameter that defines the number of samples to work through before updating the internal model parameters. Instead of having to update only after the whole dataset is trained once. It is indeed better to update after training each batch.
smaller batch_size is will take more time though¶
# evaluate a model using k-fold cross-validation
def evaluate_model(model,dataX, dataY, n_folds=5, valX=X_val, valY=y_val):
scores, histories = list(), list()
# prepare cross validation
kfold = KFold(n_folds, shuffle=True, random_state=1)
# enumerate splits
for train_ix, test_ix in kfold.split(dataX):
# select rows for train and test
trainX, trainY, testX, testY = dataX[train_ix], dataY[train_ix], dataX[test_ix], dataY[test_ix]
# fit model
history = model.fit(trainX, trainY, epochs=100, batch_size=128,validation_data=(testX, testY), verbose=0, callbacks=[EarlyStopping(monitor='val_loss', patience=10)])
# evaluate model
_, acc = model.evaluate(valX, valY, verbose=0)
print('> %.3f' % (acc * 100.0))
# append scores
scores.append(acc)
histories.append(history)
return scores, histories
%%time
from keras.layers import BatchNormalization
fashion_model_batchnorm = Sequential()
fashion_model_batchnorm.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1)))
fashion_model_batchnorm.add(Conv2D(64, (3, 3), activation='linear'))
fashion_model_batchnorm.add(BatchNormalization())
fashion_model_batchnorm.add(Flatten())
fashion_model_batchnorm.add(Dense(128, activation='linear'))
fashion_model_batchnorm.add(Dense(num_classes, activation='softmax'))
fashion_model_batchnorm.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model_batchnorm.summary()
h_callback = fashion_model_batchnorm.fit(X_train, y_train, epochs = 10, batch_size=64,validation_data=(X_test, y_test))
# Plot train vs test loss during training
plot_accuracy_and_loss(h_callback)
test_loss, test_acc = fashion_model_batchnorm.evaluate(X_test, y_test, verbose=2)
precision,recall,f1 = get_metrics(fashion_model_batchnorm,X_test)
Model_scores = pd.concat([Model_scores,pd.DataFrame([['CNN linear w batch_norm',test_acc,test_loss,precision,recall,f1]],columns=Model_scores.columns)],ignore_index=True)
gc.collect()
tf.keras.backend.clear_session()
del fashion_model_batchnorm
Model: "sequential_8"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_8 (Conv2D) (None, 26, 26, 32) 320
conv2d_9 (Conv2D) (None, 24, 24, 64) 18496
batch_normalization (BatchN (None, 24, 24, 64) 256
ormalization)
flatten_8 (Flatten) (None, 36864) 0
dense_16 (Dense) (None, 128) 4718720
dense_17 (Dense) (None, 10) 1290
=================================================================
Total params: 4,739,082
Trainable params: 4,738,954
Non-trainable params: 128
_________________________________________________________________
Epoch 1/10
750/750 [==============================] - 6s 7ms/step - loss: 2.1360 - accuracy: 0.7822 - val_loss: 0.5830 - val_accuracy: 0.7956
Epoch 2/10
750/750 [==============================] - 5s 7ms/step - loss: 0.5115 - accuracy: 0.8251 - val_loss: 0.9189 - val_accuracy: 0.7214
Epoch 3/10
750/750 [==============================] - 5s 7ms/step - loss: 0.4802 - accuracy: 0.8329 - val_loss: 0.5599 - val_accuracy: 0.8084
Epoch 4/10
750/750 [==============================] - 5s 7ms/step - loss: 0.4550 - accuracy: 0.8399 - val_loss: 0.6675 - val_accuracy: 0.7655
Epoch 5/10
750/750 [==============================] - 5s 7ms/step - loss: 0.4488 - accuracy: 0.8403 - val_loss: 0.6916 - val_accuracy: 0.7657
Epoch 6/10
750/750 [==============================] - 5s 7ms/step - loss: 0.4460 - accuracy: 0.8434 - val_loss: 0.5300 - val_accuracy: 0.8194
Epoch 7/10
750/750 [==============================] - 5s 6ms/step - loss: 0.4417 - accuracy: 0.8478 - val_loss: 1.0152 - val_accuracy: 0.7133
Epoch 8/10
750/750 [==============================] - 5s 7ms/step - loss: 0.4381 - accuracy: 0.8459 - val_loss: 0.9005 - val_accuracy: 0.7064
Epoch 9/10
750/750 [==============================] - 5s 6ms/step - loss: 0.4301 - accuracy: 0.8474 - val_loss: 0.4910 - val_accuracy: 0.8295
Epoch 10/10
750/750 [==============================] - 5s 6ms/step - loss: 0.4216 - accuracy: 0.8530 - val_loss: 0.6363 - val_accuracy: 0.7676
313/313 - 1s - loss: 0.6363 - accuracy: 0.7676 - 797ms/epoch - 3ms/step 313/313 [==============================] - 1s 1ms/step CPU times: total: 1min 8s Wall time: 52.8 s
Since it regularized on an underfitted model I did not expect it to perform as well as the model without batch normalization but it proves that it can help with overfitting and speedier training
%%time
## Importing data and augmenting it
import keras
from keras.utils import to_categorical
from keras.datasets import fashion_mnist
from tensorflow.keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
X_train , X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=13, stratify=y_train)
print(X_train.shape, X_val.shape, X_test.shape)
X_train = X_train / 255.0
X_test = X_test / 255.0
X_val = X_val / 255.0
print('before',X_train.shape,X_test.shape)
print('before',y_train.shape,y_test.shape)
X_train = X_train.reshape((X_train.shape[0], 28, 28, 1))
X_test = X_test.reshape((X_test.shape[0], 28, 28, 1))
X_val = X_val.reshape((X_val.shape[0], 28, 28, 1))
y_test_label = y_test
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
y_val = to_categorical(y_val)
# create a grid of 3x3 images
fig, ax = plt.subplots(3, 3, sharex=True, sharey=True, figsize=(4,4))
for i in range(3):
for j in range(3):
ax[i][j].imshow(X_train[i*3+j], cmap=plt.get_cmap("gray"))
# show the plot
plt.show()
from keras.layers import BatchNormalization
fashion_model_batch = Sequential()
fashion_model_batch.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1)))
fashion_model_batch.add(Conv2D(64, (3, 3), activation='linear'))
fashion_model_batch.add(BatchNormalization())
fashion_model_batch.add(Flatten())
fashion_model_batch.add(Dense(128, activation='linear'))
fashion_model_batch.add(Dense(10, activation='softmax'))
fashion_model_batch.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model_batch.summary()
early_stopping = EarlyStopping(monitor='val_loss', patience=20, verbose=1)
h_callback = fashion_model_batch.fit(X_train, y_train, epochs = 100, batch_size=128,validation_data=(X_val, y_val),callbacks=[early_stopping])
# Plot train vs test loss during training
plot_accuracy_and_loss(h_callback)
# evaluate on test set
score = fashion_model_batch.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
precision,recall,f1 = get_metrics(fashion_model_batch,X_test)
Model_scores = pd.concat([Model_scores,pd.DataFrame([['linear batchnorm high epochs w rotated data aug',score[1],score[0],precision,recall,f1]],columns=Model_scores.columns)],ignore_index=True)
del fashion_model_batch
gc.collect()
tf.keras.backend.clear_session()
(48000, 28, 28) (12000, 28, 28) (10000, 28, 28) before (48000, 28, 28) (10000, 28, 28) before (48000,) (10000,)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 26, 26, 32) 320
conv2d_1 (Conv2D) (None, 24, 24, 64) 18496
batch_normalization (BatchN (None, 24, 24, 64) 256
ormalization)
flatten (Flatten) (None, 36864) 0
dense (Dense) (None, 128) 4718720
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 4,739,082
Trainable params: 4,738,954
Non-trainable params: 128
_________________________________________________________________
Epoch 1/100
375/375 [==============================] - 4s 9ms/step - loss: 2.5341 - accuracy: 0.7707 - val_loss: 1.1475 - val_accuracy: 0.5939
Epoch 2/100
375/375 [==============================] - 3s 8ms/step - loss: 0.4980 - accuracy: 0.8326 - val_loss: 0.6398 - val_accuracy: 0.7830
Epoch 3/100
375/375 [==============================] - 3s 9ms/step - loss: 0.4690 - accuracy: 0.8378 - val_loss: 1.0912 - val_accuracy: 0.6787
Epoch 4/100
375/375 [==============================] - 3s 9ms/step - loss: 0.4521 - accuracy: 0.8427 - val_loss: 0.7169 - val_accuracy: 0.7663
Epoch 5/100
375/375 [==============================] - 3s 9ms/step - loss: 0.4418 - accuracy: 0.8459 - val_loss: 1.0841 - val_accuracy: 0.6903
Epoch 6/100
375/375 [==============================] - 3s 9ms/step - loss: 0.4328 - accuracy: 0.8464 - val_loss: 0.5979 - val_accuracy: 0.7978
Epoch 7/100
375/375 [==============================] - 3s 9ms/step - loss: 0.4262 - accuracy: 0.8494 - val_loss: 0.7216 - val_accuracy: 0.7633
Epoch 8/100
375/375 [==============================] - 3s 8ms/step - loss: 0.4192 - accuracy: 0.8513 - val_loss: 0.5108 - val_accuracy: 0.8249
Epoch 9/100
375/375 [==============================] - 3s 9ms/step - loss: 0.4162 - accuracy: 0.8536 - val_loss: 0.5037 - val_accuracy: 0.8358
Epoch 10/100
375/375 [==============================] - 3s 8ms/step - loss: 0.4169 - accuracy: 0.8522 - val_loss: 0.5058 - val_accuracy: 0.8331
Epoch 11/100
375/375 [==============================] - 3s 8ms/step - loss: 0.4128 - accuracy: 0.8536 - val_loss: 0.4757 - val_accuracy: 0.8393
Epoch 12/100
375/375 [==============================] - 3s 8ms/step - loss: 0.4125 - accuracy: 0.8547 - val_loss: 0.4927 - val_accuracy: 0.8343
Epoch 13/100
375/375 [==============================] - 3s 8ms/step - loss: 0.4134 - accuracy: 0.8544 - val_loss: 0.4953 - val_accuracy: 0.8302
Epoch 14/100
375/375 [==============================] - 3s 9ms/step - loss: 0.4111 - accuracy: 0.8540 - val_loss: 0.5814 - val_accuracy: 0.8126
Epoch 15/100
375/375 [==============================] - 3s 8ms/step - loss: 0.4101 - accuracy: 0.8544 - val_loss: 0.5141 - val_accuracy: 0.8245
Epoch 16/100
375/375 [==============================] - 3s 8ms/step - loss: 0.4068 - accuracy: 0.8571 - val_loss: 0.4677 - val_accuracy: 0.8415
Epoch 17/100
375/375 [==============================] - 3s 8ms/step - loss: 0.4034 - accuracy: 0.8572 - val_loss: 0.4977 - val_accuracy: 0.8340
Epoch 18/100
375/375 [==============================] - 3s 8ms/step - loss: 0.4060 - accuracy: 0.8572 - val_loss: 0.6110 - val_accuracy: 0.7789
Epoch 19/100
375/375 [==============================] - 3s 9ms/step - loss: 0.4024 - accuracy: 0.8565 - val_loss: 0.5666 - val_accuracy: 0.8026
Epoch 20/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3987 - accuracy: 0.8593 - val_loss: 0.5318 - val_accuracy: 0.8158
Epoch 21/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3944 - accuracy: 0.8590 - val_loss: 0.4806 - val_accuracy: 0.8331
Epoch 22/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3932 - accuracy: 0.8592 - val_loss: 0.5156 - val_accuracy: 0.8118
Epoch 23/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3877 - accuracy: 0.8622 - val_loss: 0.5023 - val_accuracy: 0.8220
Epoch 24/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3862 - accuracy: 0.8624 - val_loss: 0.4759 - val_accuracy: 0.8443
Epoch 25/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3865 - accuracy: 0.8627 - val_loss: 0.5733 - val_accuracy: 0.7953
Epoch 26/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3841 - accuracy: 0.8626 - val_loss: 0.4848 - val_accuracy: 0.8390
Epoch 27/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3833 - accuracy: 0.8646 - val_loss: 0.4702 - val_accuracy: 0.8381
Epoch 28/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3787 - accuracy: 0.8645 - val_loss: 0.4645 - val_accuracy: 0.8437
Epoch 29/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3779 - accuracy: 0.8662 - val_loss: 0.4568 - val_accuracy: 0.8427
Epoch 30/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3772 - accuracy: 0.8648 - val_loss: 0.4648 - val_accuracy: 0.8426
Epoch 31/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3733 - accuracy: 0.8664 - val_loss: 0.4517 - val_accuracy: 0.8487
Epoch 32/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3753 - accuracy: 0.8673 - val_loss: 0.4605 - val_accuracy: 0.8470
Epoch 33/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3724 - accuracy: 0.8677 - val_loss: 0.4524 - val_accuracy: 0.8514
Epoch 34/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3705 - accuracy: 0.8675 - val_loss: 0.4494 - val_accuracy: 0.8473
Epoch 35/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3714 - accuracy: 0.8672 - val_loss: 0.4557 - val_accuracy: 0.8455
Epoch 36/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3680 - accuracy: 0.8693 - val_loss: 0.4484 - val_accuracy: 0.8495
Epoch 37/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3684 - accuracy: 0.8691 - val_loss: 0.4621 - val_accuracy: 0.8446
Epoch 38/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3672 - accuracy: 0.8689 - val_loss: 0.4680 - val_accuracy: 0.8455
Epoch 39/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3656 - accuracy: 0.8687 - val_loss: 0.4638 - val_accuracy: 0.8465
Epoch 40/100
375/375 [==============================] - 4s 9ms/step - loss: 0.3653 - accuracy: 0.8692 - val_loss: 0.4557 - val_accuracy: 0.8479
Epoch 41/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3648 - accuracy: 0.8686 - val_loss: 0.4570 - val_accuracy: 0.8462
Epoch 42/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3646 - accuracy: 0.8708 - val_loss: 0.4784 - val_accuracy: 0.8407
Epoch 43/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3625 - accuracy: 0.8721 - val_loss: 0.4663 - val_accuracy: 0.8443
Epoch 44/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3627 - accuracy: 0.8706 - val_loss: 0.4626 - val_accuracy: 0.8470
Epoch 45/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3594 - accuracy: 0.8705 - val_loss: 0.4520 - val_accuracy: 0.8493
Epoch 46/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3609 - accuracy: 0.8718 - val_loss: 0.4640 - val_accuracy: 0.8438
Epoch 47/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3600 - accuracy: 0.8712 - val_loss: 0.4605 - val_accuracy: 0.8478
Epoch 48/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3595 - accuracy: 0.8702 - val_loss: 0.4573 - val_accuracy: 0.8490
Epoch 49/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3586 - accuracy: 0.8721 - val_loss: 0.4611 - val_accuracy: 0.8447
Epoch 50/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3587 - accuracy: 0.8711 - val_loss: 0.4745 - val_accuracy: 0.8415
Epoch 51/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3580 - accuracy: 0.8712 - val_loss: 0.4825 - val_accuracy: 0.8475
Epoch 52/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3592 - accuracy: 0.8715 - val_loss: 0.4719 - val_accuracy: 0.8460
Epoch 53/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3570 - accuracy: 0.8726 - val_loss: 0.4756 - val_accuracy: 0.8355
Epoch 54/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3547 - accuracy: 0.8720 - val_loss: 0.4782 - val_accuracy: 0.8405
Epoch 55/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3555 - accuracy: 0.8727 - val_loss: 0.4669 - val_accuracy: 0.8477
Epoch 56/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3560 - accuracy: 0.8719 - val_loss: 0.4704 - val_accuracy: 0.8468
Epoch 56: early stopping
Test loss: 0.4942406117916107 Test accuracy: 0.8303999900817871 313/313 [==============================] - 1s 1ms/step CPU times: total: 3min 57s Wall time: 2min 57s
Model_scores
| Model | Accuracy | Loss | Precision | Recall | F1 Score | |
|---|---|---|---|---|---|---|
| 0 | Dense 1 layer NN | 0.8798 | 0.359683 | 0.880470 | 0.8798 | 0.878955 |
| 1 | Dense NN 2 layer | 0.8798 | 0.359683 | 0.867431 | 0.8660 | 0.863119 |
| 2 | Dense NN 3 layer | 0.8798 | 0.359683 | 0.876298 | 0.8727 | 0.872353 |
| 3 | CNN linear activation | 0.8798 | 0.359683 | 0.824820 | 0.8181 | 0.817135 |
| 4 | CNN linear max pool | 0.8714 | 0.751681 | 0.871716 | 0.8714 | 0.871052 |
| 5 | CNN linear avg pool | 0.8306 | 0.489766 | 0.834260 | 0.8306 | 0.831207 |
| 6 | CNN linear avg pool | 0.7676 | 0.636280 | 0.800908 | 0.7676 | 0.763595 |
| 7 | linear batchnorm high epochs w data aug | 0.7949 | 0.586244 | 0.807667 | 0.7949 | 0.794490 |
| 8 | linear batchnorm high epochs w data aug | 0.8304 | 0.494241 | 0.832257 | 0.8304 | 0.830593 |
X_train_rotate = X_train.copy()
y_train_rotate = y_train.copy()
# define data preparation
datagen = ImageDataGenerator(rotation_range=90)
# fit parameters from data
datagen.fit(X_train_rotate)
# configure batch size and retrieve one batch of images
for X_batch, y_batch in datagen.flow(X_train_rotate, y_train_rotate, batch_size=9, shuffle=False):
# create a grid of 3x3 images
fig, ax = plt.subplots(3, 3, sharex=True, sharey=True, figsize=(4,4))
for i in range(3):
for j in range(3):
ax[i][j].imshow(X_batch[i*3+j], cmap=plt.get_cmap("gray"))
# show the plot
plt.show()
break
X_train_rotated = datagen.flow(X_train_rotate, y_train_rotate, batch_size=X_train_rotate.shape[0], shuffle=False).next()
X_train_rotated = X_train_rotated[0]
# merge rotated data with original
X_train_rot = np.concatenate((X_train,X_train_rotated),axis=0)
y_train_rot = np.concatenate((y_train,y_train_rotate),axis=0)
print('after',X_train.shape,X_test.shape)
print('after',y_train.shape,y_test.shape)
plt.imshow(X_train_rot[12])
plt.show()
after (48000, 28, 28, 1) (10000, 28, 28, 1) after (48000, 10) (10000, 10)
X_train_shift = X_train.copy()
y_train_shift = y_train.copy()
# define data preparation
datagen = ImageDataGenerator(width_shift_range=[-2,2], height_shift_range=[-2,2])
# fit parameters from data
datagen.fit(X_train_shift)
# configure batch size and retrieve one batch of images
for X_batch, y_batch in datagen.flow(X_train_shift, y_train_shift, batch_size=9, shuffle=False):
# create a grid of 3x3 images
fig, ax = plt.subplots(3, 3, sharex=True, sharey=True, figsize=(4,4))
for i in range(3):
for j in range(3):
ax[i][j].imshow(X_batch[i*3+j], cmap=plt.get_cmap("gray"))
# show the plot
plt.show()
break
X_train_shifted = datagen.flow(X_train_shift, y_train_shift, batch_size=X_train_shift.shape[0], shuffle=False).next()
X_train_shifted = X_train_shifted[0]
# merge rotated data with original
X_train_shif = np.concatenate((X_train,X_train_shifted),axis=0)
y_train_shif = np.concatenate((y_train,y_train_shift),axis=0)
print('after',X_train.shape,X_test.shape)
print('after',y_train.shape,y_test.shape)
after (48000, 28, 28, 1) (10000, 28, 28, 1) after (48000, 10) (10000, 10)
X_train_flip = X_train.copy()
y_train_flip = y_train.copy()
# define data preparation
datagen = ImageDataGenerator(horizontal_flip=True, vertical_flip=True)
# fit parameters from data
datagen.fit(X_train_flip)
# configure batch size and retrieve one batch of images
for X_batch, y_batch in datagen.flow(X_train_flip, y_train_flip, batch_size=9, shuffle=False):
# create a grid of 3x3 images
fig, ax = plt.subplots(3, 3, sharex=True, sharey=True, figsize=(4,4))
for i in range(3):
for j in range(3):
ax[i][j].imshow(X_batch[i*3+j], cmap=plt.get_cmap("gray"))
# show the plot
plt.show()
break
X_train_flipped = datagen.flow(X_train_flip, y_train_flip, batch_size=X_train_flip.shape[0], shuffle=False).next()
X_train_flipped = X_train_flipped[0]
# merge flipped data with original
X_train_flips = np.concatenate((X_train,X_train_flipped),axis=0)
y_train_flips = np.concatenate((y_train,y_train_flip),axis=0)
print('after',X_train.shape,X_test.shape)
print('after',y_train.shape,y_test.shape)
after (48000, 28, 28, 1) (10000, 28, 28, 1) after (48000, 10) (10000, 10)
# concat flip shift and rotate
X_train = np.concatenate((X_train,X_train_shifted,X_train_rotated),axis=0)
y_train = np.concatenate((y_train,y_train_shift,y_train_rotate),axis=0)
print('total data points after adding all:',X_train.shape,X_test.shape)
total data points after adding all: (144000, 28, 28, 1) (10000, 28, 28, 1)

from tensorflow.keras.layers import Conv2D,Flatten
from numpy import mean
from numpy import std
from matplotlib import pyplot
from sklearn.model_selection import KFold
from keras.datasets import fashion_mnist
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.optimizers import SGD
from keras.layers import Dropout
from keras.layers import BatchNormalization
from keras.layers import Activation
from keras.layers import LeakyReLU
def model_relu():
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='relu'))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
return fashion_model
run_test_harness(model_relu(),X_train, y_train)
> 88.700 > 89.125 > 89.200 > 88.292 > 88.617
Accuracy: mean=88.787 std=0.337, n=5
def model_tanh():
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='tanh',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='tanh'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='tanh'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='linear'))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model.summary()
return fashion_model
run_test_harness(model_tanh(),X_train, y_train)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 26, 26, 32) 320
max_pooling2d (MaxPooling2D (None, 13, 13, 32) 0
)
conv2d_1 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_1 (MaxPooling (None, 5, 5, 64) 0
2D)
conv2d_2 (Conv2D) (None, 3, 3, 128) 73856
max_pooling2d_2 (MaxPooling (None, 1, 1, 128) 0
2D)
flatten (Flatten) (None, 128) 0
dense (Dense) (None, 128) 16512
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 110,474
Trainable params: 110,474
Non-trainable params: 0
_________________________________________________________________
> 87.983
> 87.567
> 87.608
> 87.567
> 86.800
Accuracy: mean=87.505 std=0.386, n=5
from keras.layers import LeakyReLU
# fix random seed for reproducibility
def model_leaky_relu():
seed = 1
np.random.seed(seed)
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1),padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D((2, 2),padding='same'))
fashion_model.add(Conv2D(64, (3, 3), activation='linear',padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
fashion_model.add(Conv2D(128, (3, 3), activation='linear',padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='linear'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model.summary()
return fashion_model
run_test_harness(model_leaky_relu(),X_train, y_train)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 28, 28, 32) 320
leaky_re_lu (LeakyReLU) (None, 28, 28, 32) 0
max_pooling2d (MaxPooling2D (None, 14, 14, 32) 0
)
conv2d_1 (Conv2D) (None, 14, 14, 64) 18496
leaky_re_lu_1 (LeakyReLU) (None, 14, 14, 64) 0
max_pooling2d_1 (MaxPooling (None, 7, 7, 64) 0
2D)
conv2d_2 (Conv2D) (None, 7, 7, 128) 73856
leaky_re_lu_2 (LeakyReLU) (None, 7, 7, 128) 0
max_pooling2d_2 (MaxPooling (None, 4, 4, 128) 0
2D)
flatten (Flatten) (None, 2048) 0
dense (Dense) (None, 128) 262272
leaky_re_lu_3 (LeakyReLU) (None, 128) 0
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 356,234
Trainable params: 356,234
Non-trainable params: 0
_________________________________________________________________
> 91.667
> 91.175
> 91.533
> 92.150
> 91.842
Accuracy: mean=91.673 std=0.324, n=5
def model_selu():
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='selu',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='selu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='selu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='selu'))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model.summary()
return fashion_model
run_test_harness(model_selu(),X_train, y_train)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 26, 26, 32) 320
max_pooling2d (MaxPooling2D (None, 13, 13, 32) 0
)
conv2d_1 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_1 (MaxPooling (None, 5, 5, 64) 0
2D)
conv2d_2 (Conv2D) (None, 3, 3, 128) 73856
max_pooling2d_2 (MaxPooling (None, 1, 1, 128) 0
2D)
flatten (Flatten) (None, 128) 0
dense (Dense) (None, 128) 16512
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 110,474
Trainable params: 110,474
Non-trainable params: 0
_________________________________________________________________
> 88.333
> 87.825
> 87.883
> 88.117
> 87.750
Accuracy: mean=87.982 std=0.214, n=5
def model_elu():
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='elu',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='elu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='elu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='elu'))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model.summary()
return fashion_model
run_test_harness(model_elu(),X_train, y_train)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 26, 26, 32) 320
max_pooling2d (MaxPooling2D (None, 13, 13, 32) 0
)
conv2d_1 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_1 (MaxPooling (None, 5, 5, 64) 0
2D)
conv2d_2 (Conv2D) (None, 3, 3, 128) 73856
max_pooling2d_2 (MaxPooling (None, 1, 1, 128) 0
2D)
flatten (Flatten) (None, 128) 0
dense (Dense) (None, 128) 16512
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 110,474
Trainable params: 110,474
Non-trainable params: 0
_________________________________________________________________
> 89.058
> 88.867
> 88.708
> 88.750
> 88.450
Accuracy: mean=88.767 std=0.200, n=5
Dropout is randomly removing neurons in a layer but when predicting neurons is added back in. How can something random help in AI we may wonder...

In the picture above,if a bad node is blocked it helps the training process. However if a good node is blocked , the neural network can still learn from a separate representation of the data though not the best but its still fine
As our accuracy goes above 90 percent , we may tend to overfit and here we will try drop out to see if it can regularize and perhaps even improve validation scores
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='relu'))
fashion_model.add(Dropout(0.2))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model.summary()
fashion_model.fit(X_train, y_train, batch_size=32,epochs=100,verbose=1,validation_data=(X_test, y_test))
test_loss, test_acc = fashion_model.evaluate(X_test, y_test)
# Dataframe to keep track of all model scores
precision,recall,f1 = get_metrics(fashion_model,X_test)
Model_scores = pd.concat([Model_scores,pd.DataFrame([['CNN relu adam w dropout',test_acc,test_loss,precision,recall,f1]],columns=Model_scores.columns)],ignore_index=True)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 26, 26, 32) 320
max_pooling2d (MaxPooling2D (None, 13, 13, 32) 0
)
conv2d_1 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_1 (MaxPooling (None, 5, 5, 64) 0
2D)
conv2d_2 (Conv2D) (None, 3, 3, 128) 73856
max_pooling2d_2 (MaxPooling (None, 1, 1, 128) 0
2D)
flatten (Flatten) (None, 128) 0
dense (Dense) (None, 128) 16512
dropout (Dropout) (None, 128) 0
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 110,474
Trainable params: 110,474
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
4500/4500 [==============================] - 21s 4ms/step - loss: 0.7534 - accuracy: 0.7231 - val_loss: 0.4656 - val_accuracy: 0.8283
Epoch 2/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.5105 - accuracy: 0.8129 - val_loss: 0.3734 - val_accuracy: 0.8634
Epoch 3/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.4416 - accuracy: 0.8388 - val_loss: 0.3466 - val_accuracy: 0.8722
Epoch 4/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.4011 - accuracy: 0.8520 - val_loss: 0.3361 - val_accuracy: 0.8773
Epoch 5/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.3733 - accuracy: 0.8621 - val_loss: 0.3443 - val_accuracy: 0.8744
Epoch 6/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.3526 - accuracy: 0.8691 - val_loss: 0.3259 - val_accuracy: 0.8839
Epoch 7/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.3315 - accuracy: 0.8766 - val_loss: 0.3329 - val_accuracy: 0.8788
Epoch 8/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.3170 - accuracy: 0.8809 - val_loss: 0.3362 - val_accuracy: 0.8844
Epoch 9/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.3025 - accuracy: 0.8868 - val_loss: 0.3376 - val_accuracy: 0.8863
Epoch 10/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.2893 - accuracy: 0.8912 - val_loss: 0.3473 - val_accuracy: 0.8872
Epoch 11/100
4500/4500 [==============================] - 21s 5ms/step - loss: 0.2809 - accuracy: 0.8937 - val_loss: 0.3512 - val_accuracy: 0.8828
Epoch 12/100
4500/4500 [==============================] - 21s 5ms/step - loss: 0.2695 - accuracy: 0.8978 - val_loss: 0.3476 - val_accuracy: 0.8848
Epoch 13/100
4500/4500 [==============================] - 20s 5ms/step - loss: 0.2627 - accuracy: 0.8996 - val_loss: 0.3680 - val_accuracy: 0.8843
Epoch 14/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.2538 - accuracy: 0.9033 - val_loss: 0.3809 - val_accuracy: 0.8865
Epoch 15/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.2457 - accuracy: 0.9062 - val_loss: 0.3989 - val_accuracy: 0.8824
Epoch 16/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.2389 - accuracy: 0.9083 - val_loss: 0.3760 - val_accuracy: 0.8883
Epoch 17/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.2316 - accuracy: 0.9116 - val_loss: 0.4174 - val_accuracy: 0.8823
Epoch 18/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.2262 - accuracy: 0.9130 - val_loss: 0.3991 - val_accuracy: 0.8832
Epoch 19/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.2204 - accuracy: 0.9146 - val_loss: 0.4060 - val_accuracy: 0.8866
Epoch 20/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.2160 - accuracy: 0.9166 - val_loss: 0.4284 - val_accuracy: 0.8846
Epoch 21/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.2100 - accuracy: 0.9193 - val_loss: 0.4509 - val_accuracy: 0.8822
Epoch 22/100
4500/4500 [==============================] - 20s 5ms/step - loss: 0.2057 - accuracy: 0.9206 - val_loss: 0.4527 - val_accuracy: 0.8835
Epoch 23/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.2022 - accuracy: 0.9216 - val_loss: 0.4362 - val_accuracy: 0.8830
Epoch 24/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.2007 - accuracy: 0.9232 - val_loss: 0.4498 - val_accuracy: 0.8851
Epoch 25/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1950 - accuracy: 0.9255 - val_loss: 0.4800 - val_accuracy: 0.8816
Epoch 26/100
4500/4500 [==============================] - 20s 5ms/step - loss: 0.1918 - accuracy: 0.9267 - val_loss: 0.5168 - val_accuracy: 0.8795
Epoch 27/100
4500/4500 [==============================] - 21s 5ms/step - loss: 0.1868 - accuracy: 0.9277 - val_loss: 0.4827 - val_accuracy: 0.8806
Epoch 28/100
4500/4500 [==============================] - 21s 5ms/step - loss: 0.1826 - accuracy: 0.9300 - val_loss: 0.4961 - val_accuracy: 0.8842
Epoch 29/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1815 - accuracy: 0.9301 - val_loss: 0.4952 - val_accuracy: 0.8862
Epoch 30/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1785 - accuracy: 0.9310 - val_loss: 0.5499 - val_accuracy: 0.8836
Epoch 31/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1761 - accuracy: 0.9316 - val_loss: 0.5208 - val_accuracy: 0.8778
Epoch 32/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1721 - accuracy: 0.9327 - val_loss: 0.5408 - val_accuracy: 0.8829
Epoch 33/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1738 - accuracy: 0.9333 - val_loss: 0.5526 - val_accuracy: 0.8753
Epoch 34/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1663 - accuracy: 0.9361 - val_loss: 0.5553 - val_accuracy: 0.8837
Epoch 35/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1659 - accuracy: 0.9361 - val_loss: 0.5468 - val_accuracy: 0.8786
Epoch 36/100
4500/4500 [==============================] - 20s 5ms/step - loss: 0.1655 - accuracy: 0.9361 - val_loss: 0.5633 - val_accuracy: 0.8835
Epoch 37/100
4500/4500 [==============================] - 21s 5ms/step - loss: 0.1609 - accuracy: 0.9377 - val_loss: 0.5950 - val_accuracy: 0.8859
Epoch 38/100
4500/4500 [==============================] - 20s 5ms/step - loss: 0.1567 - accuracy: 0.9389 - val_loss: 0.6147 - val_accuracy: 0.8790
Epoch 39/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1586 - accuracy: 0.9387 - val_loss: 0.5949 - val_accuracy: 0.8825
Epoch 40/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1575 - accuracy: 0.9402 - val_loss: 0.5934 - val_accuracy: 0.8839
Epoch 41/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1543 - accuracy: 0.9409 - val_loss: 0.6298 - val_accuracy: 0.8824
Epoch 42/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1530 - accuracy: 0.9410 - val_loss: 0.6582 - val_accuracy: 0.8807
Epoch 43/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1520 - accuracy: 0.9413 - val_loss: 0.6536 - val_accuracy: 0.8844
Epoch 44/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1515 - accuracy: 0.9423 - val_loss: 0.6202 - val_accuracy: 0.8798
Epoch 45/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1465 - accuracy: 0.9435 - val_loss: 0.6595 - val_accuracy: 0.8795
Epoch 46/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1485 - accuracy: 0.9431 - val_loss: 0.6554 - val_accuracy: 0.8775
Epoch 47/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1441 - accuracy: 0.9444 - val_loss: 0.6576 - val_accuracy: 0.8797
Epoch 48/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1458 - accuracy: 0.9445 - val_loss: 0.6790 - val_accuracy: 0.8865
Epoch 49/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1440 - accuracy: 0.9444 - val_loss: 0.7747 - val_accuracy: 0.8771
Epoch 50/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1408 - accuracy: 0.9464 - val_loss: 0.7076 - val_accuracy: 0.8804
Epoch 51/100
4500/4500 [==============================] - 20s 5ms/step - loss: 0.1418 - accuracy: 0.9461 - val_loss: 0.6757 - val_accuracy: 0.8751
Epoch 52/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1366 - accuracy: 0.9478 - val_loss: 0.7108 - val_accuracy: 0.8836
Epoch 53/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1385 - accuracy: 0.9475 - val_loss: 0.7257 - val_accuracy: 0.8767
Epoch 54/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1352 - accuracy: 0.9481 - val_loss: 0.7150 - val_accuracy: 0.8800
Epoch 55/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1366 - accuracy: 0.9478 - val_loss: 0.7608 - val_accuracy: 0.8832
Epoch 56/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1362 - accuracy: 0.9486 - val_loss: 0.7308 - val_accuracy: 0.8822
Epoch 57/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1348 - accuracy: 0.9487 - val_loss: 0.7269 - val_accuracy: 0.8803
Epoch 58/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1304 - accuracy: 0.9502 - val_loss: 0.7414 - val_accuracy: 0.8784
Epoch 59/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1314 - accuracy: 0.9498 - val_loss: 0.7500 - val_accuracy: 0.8811
Epoch 60/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1287 - accuracy: 0.9507 - val_loss: 0.8236 - val_accuracy: 0.8758
Epoch 61/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1300 - accuracy: 0.9509 - val_loss: 0.8009 - val_accuracy: 0.8823
Epoch 62/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1309 - accuracy: 0.9511 - val_loss: 0.7289 - val_accuracy: 0.8786
Epoch 63/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1295 - accuracy: 0.9510 - val_loss: 0.7752 - val_accuracy: 0.8805
Epoch 64/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1255 - accuracy: 0.9524 - val_loss: 0.8147 - val_accuracy: 0.8810
Epoch 65/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1249 - accuracy: 0.9526 - val_loss: 0.8406 - val_accuracy: 0.8784
Epoch 66/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1242 - accuracy: 0.9530 - val_loss: 0.8390 - val_accuracy: 0.8789
Epoch 67/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1262 - accuracy: 0.9520 - val_loss: 0.8118 - val_accuracy: 0.8736
Epoch 68/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1220 - accuracy: 0.9538 - val_loss: 0.8291 - val_accuracy: 0.8783
Epoch 69/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1242 - accuracy: 0.9533 - val_loss: 0.8040 - val_accuracy: 0.8773
Epoch 70/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1229 - accuracy: 0.9539 - val_loss: 0.8555 - val_accuracy: 0.8795
Epoch 71/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1200 - accuracy: 0.9552 - val_loss: 0.9014 - val_accuracy: 0.8751
Epoch 72/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1184 - accuracy: 0.9553 - val_loss: 0.8546 - val_accuracy: 0.8770
Epoch 73/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1225 - accuracy: 0.9547 - val_loss: 0.8713 - val_accuracy: 0.8821
Epoch 74/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1215 - accuracy: 0.9548 - val_loss: 0.8291 - val_accuracy: 0.8807
Epoch 75/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1177 - accuracy: 0.9559 - val_loss: 0.8788 - val_accuracy: 0.8781
Epoch 76/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1196 - accuracy: 0.9558 - val_loss: 0.8599 - val_accuracy: 0.8795
Epoch 77/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1157 - accuracy: 0.9568 - val_loss: 0.9226 - val_accuracy: 0.8763
Epoch 78/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1182 - accuracy: 0.9560 - val_loss: 0.8871 - val_accuracy: 0.8748
Epoch 79/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1170 - accuracy: 0.9568 - val_loss: 0.8586 - val_accuracy: 0.8803
Epoch 80/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1156 - accuracy: 0.9571 - val_loss: 0.9029 - val_accuracy: 0.8763
Epoch 81/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1135 - accuracy: 0.9573 - val_loss: 0.9037 - val_accuracy: 0.8754
Epoch 82/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1162 - accuracy: 0.9572 - val_loss: 0.8951 - val_accuracy: 0.8793
Epoch 83/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1150 - accuracy: 0.9570 - val_loss: 0.8858 - val_accuracy: 0.8764
Epoch 84/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1124 - accuracy: 0.9582 - val_loss: 0.9166 - val_accuracy: 0.8790
Epoch 85/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1144 - accuracy: 0.9578 - val_loss: 0.9857 - val_accuracy: 0.8765
Epoch 86/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1131 - accuracy: 0.9580 - val_loss: 0.9105 - val_accuracy: 0.8806
Epoch 87/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1113 - accuracy: 0.9595 - val_loss: 0.9353 - val_accuracy: 0.8822
Epoch 88/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1110 - accuracy: 0.9592 - val_loss: 1.0114 - val_accuracy: 0.8787
Epoch 89/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1098 - accuracy: 0.9602 - val_loss: 0.9992 - val_accuracy: 0.8792
Epoch 90/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1114 - accuracy: 0.9597 - val_loss: 1.0678 - val_accuracy: 0.8759
Epoch 91/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1124 - accuracy: 0.9593 - val_loss: 0.9888 - val_accuracy: 0.8771
Epoch 92/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1091 - accuracy: 0.9592 - val_loss: 0.9495 - val_accuracy: 0.8776
Epoch 93/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1131 - accuracy: 0.9589 - val_loss: 0.9689 - val_accuracy: 0.8775
Epoch 94/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1085 - accuracy: 0.9597 - val_loss: 0.9723 - val_accuracy: 0.8654
Epoch 95/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1098 - accuracy: 0.9595 - val_loss: 1.0262 - val_accuracy: 0.8748
Epoch 96/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1081 - accuracy: 0.9604 - val_loss: 0.9956 - val_accuracy: 0.8795
Epoch 97/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1090 - accuracy: 0.9606 - val_loss: 0.9761 - val_accuracy: 0.8786
Epoch 98/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1071 - accuracy: 0.9614 - val_loss: 0.9407 - val_accuracy: 0.8765
Epoch 99/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1058 - accuracy: 0.9616 - val_loss: 1.0479 - val_accuracy: 0.8821
Epoch 100/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1063 - accuracy: 0.9616 - val_loss: 1.0027 - val_accuracy: 0.8826
313/313 [==============================] - 1s 3ms/step - loss: 1.0027 - accuracy: 0.8826
313/313 [==============================] - 1s 2ms/step
Adding drop out does improve performance

Instead of computing the gradients over the entire dataset, it performs a parameter update for each example in the dataset.The problem of SGD is that the updates are frequent and with a high variance, so the objective function heavily fluctuates during training.This fluctuation can be an advantage with respect to batch gradient descent because it allows the function to jump to better local minima, but at the same time it can represent a disadvantage with respect to the convergence in a specific local minima.
def model_sgd():
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='relu'))
fashion_model.add(Dropout(0.2))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.SGD(),metrics=['accuracy'])
fashion_model.summary()
return fashion_model
run_test_harness(model_sgd(),X_train, y_train)
Model: "sequential_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_3 (Conv2D) (None, 26, 26, 32) 320
max_pooling2d_3 (MaxPooling (None, 13, 13, 32) 0
2D)
conv2d_4 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_4 (MaxPooling (None, 5, 5, 64) 0
2D)
conv2d_5 (Conv2D) (None, 3, 3, 128) 73856
max_pooling2d_5 (MaxPooling (None, 1, 1, 128) 0
2D)
flatten_1 (Flatten) (None, 128) 0
dense_2 (Dense) (None, 128) 16512
dropout_1 (Dropout) (None, 128) 0
dense_3 (Dense) (None, 10) 1290
=================================================================
Total params: 110,474
Trainable params: 110,474
Non-trainable params: 0
_________________________________________________________________
> 87.317
> 87.708
> 88.100
> 87.217
> 88.175
Accuracy: mean=87.703 std=0.391, n=5
It adapts the learning rate to the parameters performing small updates for frequently occurring features and large updates for the rarest ones.In this way, the network is able to capture information belonging to features that are not frequent, putting them in evidence and giving them the right weight.The problem of Adagrad is that it adjusts the learning rate for each parameter according to all the past gradients. So, the possibility of having a very small learning rate after a high number of steps — resulting from the accumulation of all the past gradients — is relevant.If the learning rate is too much small, we simply can’t update weights and the consequence is that the network doesn’t learn anymore.
def model_adagrad():
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='relu'))
fashion_model.add(Dropout(0.2))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adagrad(),metrics=['accuracy'])
fashion_model.summary()
return fashion_model
run_test_harness(model_adagrad(),X_train, y_train)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 26, 26, 32) 320
max_pooling2d (MaxPooling2D (None, 13, 13, 32) 0
)
conv2d_1 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_1 (MaxPooling (None, 5, 5, 64) 0
2D)
conv2d_2 (Conv2D) (None, 3, 3, 128) 73856
max_pooling2d_2 (MaxPooling (None, 1, 1, 128) 0
2D)
flatten (Flatten) (None, 128) 0
dense (Dense) (None, 128) 16512
dropout (Dropout) (None, 128) 0
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 110,474
Trainable params: 110,474
Non-trainable params: 0
_________________________________________________________________
> 79.367
> 82.325
> 83.592
> 84.300
> 84.642
Accuracy: mean=82.845 std=1.912, n=5
It improves the previous algorithm by introducing a history window which sets a fixed number of past gradients to take in consideration during the training.In this way, we don’t have the problem of the vanishing learning rate.
def model_adadelta():
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='relu'))
fashion_model.add(Dropout(0.2))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(),metrics=['accuracy'])
fashion_model.summary()
return fashion_model
run_test_harness(model_adadelta(),X_train, y_train)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 26, 26, 32) 320
max_pooling2d (MaxPooling2D (None, 13, 13, 32) 0
)
conv2d_1 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_1 (MaxPooling (None, 5, 5, 64) 0
2D)
conv2d_2 (Conv2D) (None, 3, 3, 128) 73856
max_pooling2d_2 (MaxPooling (None, 1, 1, 128) 0
2D)
flatten (Flatten) (None, 128) 0
dense (Dense) (None, 128) 16512
dropout (Dropout) (None, 128) 0
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 110,474
Trainable params: 110,474
Non-trainable params: 0
_________________________________________________________________
> 72.075
> 75.308
> 76.958
> 78.617
> 79.983
Accuracy: mean=76.588 std=2.749, n=5
It is very similar to Adadelta. The only difference is in the way they manage the past gradients.
def model_rmsprop():
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='relu'))
fashion_model.add(Dropout(0.2))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.RMSprop(),metrics=['accuracy'])
fashion_model.summary()
return fashion_model
run_test_harness(model_rmsprop(),X_train, y_train)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 26, 26, 32) 320
max_pooling2d (MaxPooling2D (None, 13, 13, 32) 0
)
conv2d_1 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_1 (MaxPooling (None, 5, 5, 64) 0
2D)
conv2d_2 (Conv2D) (None, 3, 3, 128) 73856
max_pooling2d_2 (MaxPooling (None, 1, 1, 128) 0
2D)
flatten (Flatten) (None, 128) 0
dense (Dense) (None, 128) 16512
dropout (Dropout) (None, 128) 0
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 110,474
Trainable params: 110,474
Non-trainable params: 0
_________________________________________________________________
> 89.233
> 88.200
> 85.583
> 86.908
> 86.917
Accuracy: mean=87.368 std=1.247, n=5
It adds to the advantages of Adadelta and RMSprop, the storing of an exponentially decaying average of past gradients similar to momentum.
def model_adam():
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='relu'))
fashion_model.add(Dropout(0.2))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model.summary()
return fashion_model
run_test_harness(model_adam(),X_train, y_train)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 26, 26, 32) 320
max_pooling2d (MaxPooling2D (None, 13, 13, 32) 0
)
conv2d_1 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_1 (MaxPooling (None, 5, 5, 64) 0
2D)
conv2d_2 (Conv2D) (None, 3, 3, 128) 73856
max_pooling2d_2 (MaxPooling (None, 1, 1, 128) 0
2D)
flatten (Flatten) (None, 128) 0
dense (Dense) (None, 128) 16512
dropout (Dropout) (None, 128) 0
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 110,474
Trainable params: 110,474
Non-trainable params: 0
_________________________________________________________________
> 89.125
> 88.925
> 89.175
> 89.350
> 88.658
Accuracy: mean=89.047 std=0.237, n=5
reduced learning rate on plateau is added and batch size is increased to hasten the compute time and also more epochs with early stopping to see performance of models.

reducing learning rate as val_loss plateaus allows optimizer to more efficiently find minimum in loss surface
from keras.callbacks import ReduceLROnPlateau
### function for looping through all the models
def tune_model_act_opt(act,opt):
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation=act,
kernel_initializer='he_normal',
input_shape=(28,28,1)))
model.add(MaxPooling2D((2, 2)))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), activation=act))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, (3, 3), activation=act))
model.add(BatchNormalization())
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(128, activation=act))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(10, activation='softmax'))
# compile model
model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt,metrics=['accuracy'])
return model
# plot diagnostic learning curves
def summarize_diagnostics(histories,act,opt):
# plot loss
pyplot.subplot(211)
pyplot.title(f'Cross Entropy Loss {act},{opt}')
pyplot.plot(histories.history['loss'], color='blue', label='train')
pyplot.plot(histories.history['val_loss'], color='orange', label='test')
# plot accuracy
pyplot.subplot(212)
pyplot.title(f'Classification Accuracy {act},{opt}')
pyplot.plot(histories.history['accuracy'], color='blue', label='train')
pyplot.plot(histories.history['val_accuracy'], color='orange', label='test')
pyplot.legend()
pyplot.show()
def run_test_harness_act_opt(act,opt,X_train, y_train, X_test, y_test,X_val,y_val):
model = tune_model_act_opt(act,opt)
# fit model
h_callback = EarlyStopping(monitor='val_accuracy', patience=5)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
patience=5, min_lr=0.001)
history = model.fit(X_train, y_train, epochs=100, batch_size=128,validation_data=(X_val, y_val), verbose=0 ,callbacks=[h_callback,reduce_lr])
# evaluate model
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
# ran into memory leakage problem trying to fix with this
gc.collect()
tf.keras.backend.clear_session()
del model
print('Test Accuracy> %.3f' % (test_acc * 100.0))
print('Test Loss> %.3f' % (test_loss * 100.0))
summarize_diagnostics(history,act,opt)
return (test_acc*100, test_loss*100)
from itertools import product
optimizers = ['adam', 'sgd', 'rmsprop', 'adagrad', 'adadelta', 'adamax', 'nadam']
activation = ['relu', 'selu', 'elu', 'tanh',keras.layers.LeakyReLU(alpha=0.01)]
accuracy_act_opt = list()
loss_act_opt = list()
activation_names = ['relu', 'selu', 'elu', 'tanh','LeakyReLU']
act_opt = list(product(activation_names,optimizers))
for act,opt in product(activation,optimizers):
acc,loss = run_test_harness_act_opt(act,opt,X_train, y_train, X_test, y_test,X_val,y_val)
accuracy_act_opt.append(acc)
loss_act_opt.append(loss)
Test Accuracy> 90.410 Test Loss> 25.313
Test Accuracy> 88.930 Test Loss> 30.751
Test Accuracy> 89.960 Test Loss> 26.891
Test Accuracy> 85.750 Test Loss> 39.456
Test Accuracy> 77.720 Test Loss> 60.164
Test Accuracy> 91.010 Test Loss> 25.086
Test Accuracy> 91.300 Test Loss> 24.176
Test Accuracy> 91.540 Test Loss> 23.460
Test Accuracy> 88.510 Test Loss> 31.221
Test Accuracy> 90.280 Test Loss> 26.774
Test Accuracy> 83.910 Test Loss> 45.188
Test Accuracy> 78.020 Test Loss> 61.079
Test Accuracy> 90.850 Test Loss> 24.860
Test Accuracy> 90.550 Test Loss> 26.125
Test Accuracy> 90.030 Test Loss> 26.094
Test Accuracy> 88.610 Test Loss> 31.108
Test Accuracy> 90.940 Test Loss> 25.740
Test Accuracy> 83.240 Test Loss> 45.483
Test Accuracy> 75.500 Test Loss> 66.238
Test Accuracy> 90.610 Test Loss> 25.524
Test Accuracy> 89.700 Test Loss> 27.662
Test Accuracy> 88.780 Test Loss> 31.092
Test Accuracy> 89.550 Test Loss> 29.274
Test Accuracy> 89.630 Test Loss> 30.272
Test Accuracy> 82.770 Test Loss> 47.703
Test Accuracy> 76.430 Test Loss> 66.457
Test Accuracy> 90.590 Test Loss> 26.729
Test Accuracy> 89.280 Test Loss> 29.253
Test Accuracy> 91.660 Test Loss> 23.224
Test Accuracy> 89.920 Test Loss> 27.934
Test Accuracy> 90.720 Test Loss> 26.221
Test Accuracy> 85.410 Test Loss> 39.872
Test Accuracy> 78.640 Test Loss> 57.588
Test Accuracy> 91.190 Test Loss> 24.197
Test Accuracy> 91.370 Test Loss> 23.067
# create a dataframe of scores_act_opt and act_opt
df_act_opt = pd.DataFrame({'act_opt':act_opt,'accuracy':accuracy_act_opt,'loss':loss_act_opt})
# highlight highest score_act_opt in df_act_opt
df_act_opt.style.highlight_max(subset=['accuracy'],color='green', axis=0).highlight_min(subset=['loss'],color='green', axis=0)
# style only score_act_opt in df_act_opt
| act_opt | accuracy | loss | |
|---|---|---|---|
| 0 | ('relu', 'adam') | 90.410000 | 25.313336 |
| 1 | ('relu', 'sgd') | 88.929999 | 30.751297 |
| 2 | ('relu', 'rmsprop') | 89.960003 | 26.891154 |
| 3 | ('relu', 'adagrad') | 85.750002 | 39.455575 |
| 4 | ('relu', 'adadelta') | 77.719998 | 60.163707 |
| 5 | ('relu', 'adamax') | 91.009998 | 25.086260 |
| 6 | ('relu', 'nadam') | 91.299999 | 24.176198 |
| 7 | ('selu', 'adam') | 91.540003 | 23.459524 |
| 8 | ('selu', 'sgd') | 88.510001 | 31.220523 |
| 9 | ('selu', 'rmsprop') | 90.280002 | 26.774284 |
| 10 | ('selu', 'adagrad') | 83.910000 | 45.188287 |
| 11 | ('selu', 'adadelta') | 78.020000 | 61.078596 |
| 12 | ('selu', 'adamax') | 90.850002 | 24.860381 |
| 13 | ('selu', 'nadam') | 90.549999 | 26.124877 |
| 14 | ('elu', 'adam') | 90.030003 | 26.094198 |
| 15 | ('elu', 'sgd') | 88.609999 | 31.108001 |
| 16 | ('elu', 'rmsprop') | 90.939999 | 25.739521 |
| 17 | ('elu', 'adagrad') | 83.240002 | 45.483038 |
| 18 | ('elu', 'adadelta') | 75.500000 | 66.237921 |
| 19 | ('elu', 'adamax') | 90.609998 | 25.524434 |
| 20 | ('elu', 'nadam') | 89.700001 | 27.661842 |
| 21 | ('tanh', 'adam') | 88.779998 | 31.091866 |
| 22 | ('tanh', 'sgd') | 89.550000 | 29.274067 |
| 23 | ('tanh', 'rmsprop') | 89.630002 | 30.272409 |
| 24 | ('tanh', 'adagrad') | 82.770002 | 47.702739 |
| 25 | ('tanh', 'adadelta') | 76.429999 | 66.456974 |
| 26 | ('tanh', 'adamax') | 90.590000 | 26.728883 |
| 27 | ('tanh', 'nadam') | 89.279997 | 29.252899 |
| 28 | ('LeakyReLU', 'adam') | 91.659999 | 23.224306 |
| 29 | ('LeakyReLU', 'sgd') | 89.920002 | 27.933624 |
| 30 | ('LeakyReLU', 'rmsprop') | 90.719998 | 26.220742 |
| 31 | ('LeakyReLU', 'adagrad') | 85.409999 | 39.871824 |
| 32 | ('LeakyReLU', 'adadelta') | 78.640002 | 57.587957 |
| 33 | ('LeakyReLU', 'adamax') | 91.189998 | 24.196951 |
| 34 | ('LeakyReLU', 'nadam') | 91.369998 | 23.066597 |
seed = 1
np.random.seed(seed)
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1),padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D((2, 2),padding='same'))
fashion_model.add(Conv2D(64, (3, 3), activation='linear',padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
fashion_model.add(Conv2D(128, (3, 3), activation='linear',padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='linear'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model.summary()
# fit model
h_callback = EarlyStopping(monitor='val_accuracy', patience=10)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
patience=5, min_lr=0.001)
history = fashion_model.fit(X_train, y_train, epochs=100, batch_size=128,validation_data=(X_val, y_val), verbose=0 ,callbacks=[h_callback,reduce_lr])
# evaluate model
_, acc = fashion_model.evaluate(X_test, y_test, verbose=0)
print('> %.3f' % (acc * 100.0))
summarize_diagnostics(history,act,opt)
# learning curves
summarize_diagnostics(history,act,opt)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 28, 28, 32) 320
leaky_re_lu (LeakyReLU) (None, 28, 28, 32) 0
max_pooling2d (MaxPooling2D (None, 14, 14, 32) 0
)
conv2d_1 (Conv2D) (None, 14, 14, 64) 18496
leaky_re_lu_1 (LeakyReLU) (None, 14, 14, 64) 0
max_pooling2d_1 (MaxPooling (None, 7, 7, 64) 0
2D)
conv2d_2 (Conv2D) (None, 7, 7, 128) 73856
leaky_re_lu_2 (LeakyReLU) (None, 7, 7, 128) 0
max_pooling2d_2 (MaxPooling (None, 4, 4, 128) 0
2D)
flatten (Flatten) (None, 2048) 0
dense (Dense) (None, 128) 262272
leaky_re_lu_3 (LeakyReLU) (None, 128) 0
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 356,234
Trainable params: 356,234
Non-trainable params: 0
_________________________________________________________________
> 91.190

Graph 1. Model with a good fit and high variance. Source: https://www.researchgate.net/publication/332412613
We would like a good fit instead as sometimes there are noisy data that will prevent us from generalizing well, preventing the model from predicting well on real world data
import tensorflow
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation='relu',
kernel_initializer='he_normal',
input_shape=(28,28,1)))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(10, activation='softmax', kernel_regularizer=tensorflow.keras.regularizers.L1(l1=0.05)))
#loop through the optimizers
model.compile(optimizer='adam',
loss=tf.keras.losses.categorical_crossentropy,
metrics=['accuracy'])
model.summary()
early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
patience=5, min_lr=0.001)
h_callback = model.fit(X_train, y_train, epochs = 100,
validation_data=(X_val, y_val), callbacks=[early_stopping,reduce_lr])
# Plot train vs test loss during training
plot_loss(h_callback.history['loss'], h_callback.history['val_loss'])
plot_accuracy(h_callback.history['accuracy'], h_callback.history['val_accuracy'])
Model: "sequential_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_3 (Conv2D) (None, 26, 26, 32) 320
max_pooling2d_3 (MaxPooling (None, 13, 13, 32) 0
2D)
dropout (Dropout) (None, 13, 13, 32) 0
conv2d_4 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_4 (MaxPooling (None, 5, 5, 64) 0
2D)
dropout_1 (Dropout) (None, 5, 5, 64) 0
conv2d_5 (Conv2D) (None, 3, 3, 128) 73856
dropout_2 (Dropout) (None, 3, 3, 128) 0
flatten_1 (Flatten) (None, 1152) 0
dense_2 (Dense) (None, 128) 147584
dropout_3 (Dropout) (None, 128) 0
flatten_2 (Flatten) (None, 128) 0
dense_3 (Dense) (None, 10) 1290
=================================================================
Total params: 241,546
Trainable params: 241,546
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
4500/4500 [==============================] - 23s 5ms/step - loss: 1.2245 - accuracy: 0.6577 - val_loss: 0.6385 - val_accuracy: 0.8151 - lr: 0.0010
Epoch 2/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.8505 - accuracy: 0.7447 - val_loss: 0.5347 - val_accuracy: 0.8587 - lr: 0.0010
Epoch 3/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.7715 - accuracy: 0.7771 - val_loss: 0.4971 - val_accuracy: 0.8716 - lr: 0.0010
Epoch 4/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.7192 - accuracy: 0.7957 - val_loss: 0.4878 - val_accuracy: 0.8777 - lr: 0.0010
Epoch 5/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.6920 - accuracy: 0.8045 - val_loss: 0.4536 - val_accuracy: 0.8882 - lr: 0.0010
Epoch 6/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.6716 - accuracy: 0.8128 - val_loss: 0.4632 - val_accuracy: 0.8854 - lr: 0.0010
Epoch 7/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.6525 - accuracy: 0.8194 - val_loss: 0.4362 - val_accuracy: 0.8928 - lr: 0.0010
Epoch 8/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.6430 - accuracy: 0.8229 - val_loss: 0.4370 - val_accuracy: 0.8940 - lr: 0.0010
Epoch 9/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.6327 - accuracy: 0.8263 - val_loss: 0.4219 - val_accuracy: 0.9000 - lr: 0.0010
Epoch 10/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.6246 - accuracy: 0.8277 - val_loss: 0.4273 - val_accuracy: 0.8990 - lr: 0.0010
Epoch 11/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.6166 - accuracy: 0.8322 - val_loss: 0.4097 - val_accuracy: 0.9003 - lr: 0.0010
Epoch 12/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.6102 - accuracy: 0.8332 - val_loss: 0.4505 - val_accuracy: 0.8942 - lr: 0.0010
Epoch 13/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.6011 - accuracy: 0.8375 - val_loss: 0.4266 - val_accuracy: 0.8978 - lr: 0.0010
Epoch 14/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.6009 - accuracy: 0.8367 - val_loss: 0.4181 - val_accuracy: 0.8991 - lr: 0.0010
Epoch 15/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5988 - accuracy: 0.8376 - val_loss: 0.4127 - val_accuracy: 0.9062 - lr: 0.0010
Epoch 16/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5917 - accuracy: 0.8397 - val_loss: 0.4117 - val_accuracy: 0.8928 - lr: 0.0010
Epoch 17/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5897 - accuracy: 0.8404 - val_loss: 0.4300 - val_accuracy: 0.9002 - lr: 0.0010
Epoch 18/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5888 - accuracy: 0.8400 - val_loss: 0.4194 - val_accuracy: 0.8957 - lr: 0.0010
Epoch 19/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5839 - accuracy: 0.8435 - val_loss: 0.4061 - val_accuracy: 0.8917 - lr: 0.0010
Epoch 20/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5799 - accuracy: 0.8432 - val_loss: 0.4016 - val_accuracy: 0.9038 - lr: 0.0010
Epoch 21/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5772 - accuracy: 0.8435 - val_loss: 0.4018 - val_accuracy: 0.8995 - lr: 0.0010
Epoch 22/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5774 - accuracy: 0.8444 - val_loss: 0.3987 - val_accuracy: 0.9029 - lr: 0.0010
Epoch 23/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5716 - accuracy: 0.8456 - val_loss: 0.4006 - val_accuracy: 0.9042 - lr: 0.0010
Epoch 24/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5734 - accuracy: 0.8454 - val_loss: 0.3878 - val_accuracy: 0.9087 - lr: 0.0010
Epoch 25/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5677 - accuracy: 0.8472 - val_loss: 0.3962 - val_accuracy: 0.9033 - lr: 0.0010
Epoch 26/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5659 - accuracy: 0.8473 - val_loss: 0.3915 - val_accuracy: 0.9071 - lr: 0.0010
Epoch 27/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5666 - accuracy: 0.8483 - val_loss: 0.3937 - val_accuracy: 0.9072 - lr: 0.0010
Epoch 28/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5641 - accuracy: 0.8487 - val_loss: 0.4084 - val_accuracy: 0.8978 - lr: 0.0010
Epoch 29/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5636 - accuracy: 0.8482 - val_loss: 0.3853 - val_accuracy: 0.9079 - lr: 0.0010
Epoch 30/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5619 - accuracy: 0.8490 - val_loss: 0.3867 - val_accuracy: 0.9047 - lr: 0.0010
Epoch 31/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5604 - accuracy: 0.8502 - val_loss: 0.3941 - val_accuracy: 0.9056 - lr: 0.0010
Epoch 32/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5578 - accuracy: 0.8507 - val_loss: 0.3861 - val_accuracy: 0.9079 - lr: 0.0010
Epoch 33/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5589 - accuracy: 0.8514 - val_loss: 0.4041 - val_accuracy: 0.9046 - lr: 0.0010
Epoch 34/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5571 - accuracy: 0.8509 - val_loss: 0.3803 - val_accuracy: 0.9077 - lr: 0.0010
Epoch 35/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5569 - accuracy: 0.8515 - val_loss: 0.3711 - val_accuracy: 0.9100 - lr: 0.0010
Epoch 36/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5543 - accuracy: 0.8515 - val_loss: 0.4088 - val_accuracy: 0.8982 - lr: 0.0010
Epoch 37/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5563 - accuracy: 0.8516 - val_loss: 0.3978 - val_accuracy: 0.9067 - lr: 0.0010
Epoch 38/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5495 - accuracy: 0.8527 - val_loss: 0.3745 - val_accuracy: 0.9078 - lr: 0.0010
Epoch 39/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5537 - accuracy: 0.8517 - val_loss: 0.3948 - val_accuracy: 0.9103 - lr: 0.0010
Epoch 40/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5522 - accuracy: 0.8521 - val_loss: 0.3668 - val_accuracy: 0.9132 - lr: 0.0010
Epoch 41/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5490 - accuracy: 0.8542 - val_loss: 0.3894 - val_accuracy: 0.9116 - lr: 0.0010
Epoch 42/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5482 - accuracy: 0.8550 - val_loss: 0.3775 - val_accuracy: 0.9119 - lr: 0.0010
Epoch 43/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5481 - accuracy: 0.8545 - val_loss: 0.3760 - val_accuracy: 0.9091 - lr: 0.0010
Epoch 44/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5460 - accuracy: 0.8529 - val_loss: 0.3731 - val_accuracy: 0.9098 - lr: 0.0010
Epoch 45/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5477 - accuracy: 0.8537 - val_loss: 0.3663 - val_accuracy: 0.9139 - lr: 0.0010
Epoch 46/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5468 - accuracy: 0.8535 - val_loss: 0.3807 - val_accuracy: 0.9099 - lr: 0.0010
Epoch 47/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5493 - accuracy: 0.8546 - val_loss: 0.3806 - val_accuracy: 0.9057 - lr: 0.0010
Epoch 48/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5442 - accuracy: 0.8555 - val_loss: 0.3717 - val_accuracy: 0.9119 - lr: 0.0010
Epoch 49/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5423 - accuracy: 0.8559 - val_loss: 0.3916 - val_accuracy: 0.9125 - lr: 0.0010
Epoch 50/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5443 - accuracy: 0.8543 - val_loss: 0.3700 - val_accuracy: 0.9143 - lr: 0.0010
Epoch 51/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5412 - accuracy: 0.8567 - val_loss: 0.3838 - val_accuracy: 0.9062 - lr: 0.0010
Epoch 52/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5432 - accuracy: 0.8568 - val_loss: 0.3584 - val_accuracy: 0.9129 - lr: 0.0010
Epoch 53/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5428 - accuracy: 0.8562 - val_loss: 0.3792 - val_accuracy: 0.9119 - lr: 0.0010
Epoch 54/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5438 - accuracy: 0.8557 - val_loss: 0.3832 - val_accuracy: 0.9102 - lr: 0.0010
Epoch 55/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5403 - accuracy: 0.8566 - val_loss: 0.3830 - val_accuracy: 0.9112 - lr: 0.0010
Epoch 56/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5393 - accuracy: 0.8563 - val_loss: 0.3655 - val_accuracy: 0.9135 - lr: 0.0010
Epoch 57/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5387 - accuracy: 0.8570 - val_loss: 0.3595 - val_accuracy: 0.9130 - lr: 0.0010
Epoch 58/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5386 - accuracy: 0.8578 - val_loss: 0.3875 - val_accuracy: 0.9088 - lr: 0.0010
Epoch 59/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5363 - accuracy: 0.8582 - val_loss: 0.4238 - val_accuracy: 0.8882 - lr: 0.0010
Epoch 60/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5388 - accuracy: 0.8562 - val_loss: 0.3977 - val_accuracy: 0.9139 - lr: 0.0010
Epoch 61/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5389 - accuracy: 0.8568 - val_loss: 0.4012 - val_accuracy: 0.9146 - lr: 0.0010
Epoch 62/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5386 - accuracy: 0.8570 - val_loss: 0.3854 - val_accuracy: 0.9101 - lr: 0.0010
Epoch 62: early stopping
import tensorflow
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation='relu',
kernel_initializer='he_normal',
input_shape=(28,28,1)))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(10, activation='softmax', kernel_regularizer=tensorflow.keras.regularizers.L2(l2=0.05)))
#loop through the optimizers
model.compile(optimizer='adam',
loss=tf.keras.losses.categorical_crossentropy,
metrics=['accuracy'])
model.summary()
early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
patience=5, min_lr=0.001)
h_callback = model.fit(X_train, y_train, epochs = 100,
validation_data=(X_val, y_val), callbacks=[early_stopping,reduce_lr])
# Plot train vs test loss during training
plot_loss(h_callback.history['loss'], h_callback.history['val_loss'])
plot_accuracy(h_callback.history['accuracy'], h_callback.history['val_accuracy'])
Model: "sequential_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_6 (Conv2D) (None, 26, 26, 32) 320
max_pooling2d_5 (MaxPooling (None, 13, 13, 32) 0
2D)
dropout_4 (Dropout) (None, 13, 13, 32) 0
conv2d_7 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_6 (MaxPooling (None, 5, 5, 64) 0
2D)
dropout_5 (Dropout) (None, 5, 5, 64) 0
conv2d_8 (Conv2D) (None, 3, 3, 128) 73856
dropout_6 (Dropout) (None, 3, 3, 128) 0
flatten_3 (Flatten) (None, 1152) 0
dense_4 (Dense) (None, 128) 147584
dropout_7 (Dropout) (None, 128) 0
flatten_4 (Flatten) (None, 128) 0
dense_5 (Dense) (None, 10) 1290
=================================================================
Total params: 241,546
Trainable params: 241,546
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
4500/4500 [==============================] - 23s 5ms/step - loss: 0.8948 - accuracy: 0.6929 - val_loss: 0.4543 - val_accuracy: 0.8413 - lr: 0.0010
Epoch 2/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.6289 - accuracy: 0.7812 - val_loss: 0.3786 - val_accuracy: 0.8742 - lr: 0.0010
Epoch 3/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5638 - accuracy: 0.8067 - val_loss: 0.3533 - val_accuracy: 0.8786 - lr: 0.0010
Epoch 4/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5308 - accuracy: 0.8189 - val_loss: 0.3408 - val_accuracy: 0.8887 - lr: 0.0010
Epoch 5/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5063 - accuracy: 0.8271 - val_loss: 0.3122 - val_accuracy: 0.8945 - lr: 0.0010
Epoch 6/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4927 - accuracy: 0.8308 - val_loss: 0.3209 - val_accuracy: 0.8898 - lr: 0.0010
Epoch 7/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4822 - accuracy: 0.8344 - val_loss: 0.3219 - val_accuracy: 0.8885 - lr: 0.0010
Epoch 8/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4693 - accuracy: 0.8395 - val_loss: 0.3030 - val_accuracy: 0.8963 - lr: 0.0010
Epoch 9/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4633 - accuracy: 0.8418 - val_loss: 0.2862 - val_accuracy: 0.9042 - lr: 0.0010
Epoch 10/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4583 - accuracy: 0.8426 - val_loss: 0.2901 - val_accuracy: 0.9008 - lr: 0.0010
Epoch 11/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4502 - accuracy: 0.8458 - val_loss: 0.2929 - val_accuracy: 0.9008 - lr: 0.0010
Epoch 12/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4484 - accuracy: 0.8464 - val_loss: 0.2810 - val_accuracy: 0.9068 - lr: 0.0010
Epoch 13/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4424 - accuracy: 0.8489 - val_loss: 0.2961 - val_accuracy: 0.9028 - lr: 0.0010
Epoch 14/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4425 - accuracy: 0.8489 - val_loss: 0.2963 - val_accuracy: 0.9008 - lr: 0.0010
Epoch 15/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4390 - accuracy: 0.8499 - val_loss: 0.2915 - val_accuracy: 0.8973 - lr: 0.0010
Epoch 16/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4351 - accuracy: 0.8513 - val_loss: 0.2774 - val_accuracy: 0.9096 - lr: 0.0010
Epoch 17/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4348 - accuracy: 0.8520 - val_loss: 0.2764 - val_accuracy: 0.9086 - lr: 0.0010
Epoch 18/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4297 - accuracy: 0.8523 - val_loss: 0.2863 - val_accuracy: 0.9042 - lr: 0.0010
Epoch 19/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4253 - accuracy: 0.8555 - val_loss: 0.2862 - val_accuracy: 0.9066 - lr: 0.0010
Epoch 20/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4238 - accuracy: 0.8548 - val_loss: 0.2812 - val_accuracy: 0.9064 - lr: 0.0010
Epoch 21/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4225 - accuracy: 0.8544 - val_loss: 0.2672 - val_accuracy: 0.9110 - lr: 0.0010
Epoch 22/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4217 - accuracy: 0.8547 - val_loss: 0.2749 - val_accuracy: 0.9097 - lr: 0.0010
Epoch 23/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4216 - accuracy: 0.8559 - val_loss: 0.2698 - val_accuracy: 0.9112 - lr: 0.0010
Epoch 24/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4196 - accuracy: 0.8565 - val_loss: 0.2761 - val_accuracy: 0.9057 - lr: 0.0010
Epoch 25/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4198 - accuracy: 0.8564 - val_loss: 0.2659 - val_accuracy: 0.9104 - lr: 0.0010
Epoch 26/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4165 - accuracy: 0.8579 - val_loss: 0.2788 - val_accuracy: 0.9069 - lr: 0.0010
Epoch 27/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4156 - accuracy: 0.8568 - val_loss: 0.2775 - val_accuracy: 0.9040 - lr: 0.0010
Epoch 28/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4149 - accuracy: 0.8583 - val_loss: 0.2693 - val_accuracy: 0.9115 - lr: 0.0010
Epoch 29/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4111 - accuracy: 0.8599 - val_loss: 0.2810 - val_accuracy: 0.9061 - lr: 0.0010
Epoch 30/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4122 - accuracy: 0.8599 - val_loss: 0.2762 - val_accuracy: 0.9083 - lr: 0.0010
Epoch 31/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4111 - accuracy: 0.8599 - val_loss: 0.2711 - val_accuracy: 0.9080 - lr: 0.0010
Epoch 32/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4097 - accuracy: 0.8597 - val_loss: 0.2725 - val_accuracy: 0.9097 - lr: 0.0010
Epoch 33/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4081 - accuracy: 0.8601 - val_loss: 0.2844 - val_accuracy: 0.9018 - lr: 0.0010
Epoch 34/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4108 - accuracy: 0.8594 - val_loss: 0.2729 - val_accuracy: 0.9114 - lr: 0.0010
Epoch 35/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4089 - accuracy: 0.8598 - val_loss: 0.2667 - val_accuracy: 0.9089 - lr: 0.0010
Epoch 35: early stopping
import tensorflow
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation='relu',
kernel_initializer='he_normal',
input_shape=(28,28,1)))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(10, activation='softmax', kernel_regularizer=tensorflow.keras.regularizers.L1L2(l1=0.01, l2=0.01)))
#loop through the optimizers
model.compile(optimizer='adam',
loss=tf.keras.losses.categorical_crossentropy,
metrics=['accuracy'])
model.summary()
early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
patience=5, min_lr=0.001)
h_callback = model.fit(X_train, y_train, epochs = 100,
validation_data=(X_val, y_val), callbacks=[early_stopping,reduce_lr])
# Plot train vs test loss during training
plot_loss(h_callback.history['loss'], h_callback.history['val_loss'])
plot_accuracy(h_callback.history['accuracy'], h_callback.history['val_accuracy'])
Model: "sequential_3"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_9 (Conv2D) (None, 26, 26, 32) 320
max_pooling2d_7 (MaxPooling (None, 13, 13, 32) 0
2D)
dropout_8 (Dropout) (None, 13, 13, 32) 0
conv2d_10 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_8 (MaxPooling (None, 5, 5, 64) 0
2D)
dropout_9 (Dropout) (None, 5, 5, 64) 0
conv2d_11 (Conv2D) (None, 3, 3, 128) 73856
dropout_10 (Dropout) (None, 3, 3, 128) 0
flatten_5 (Flatten) (None, 1152) 0
dense_6 (Dense) (None, 128) 147584
dropout_11 (Dropout) (None, 128) 0
flatten_6 (Flatten) (None, 128) 0
dense_7 (Dense) (None, 10) 1290
=================================================================
Total params: 241,546
Trainable params: 241,546
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
4500/4500 [==============================] - 23s 5ms/step - loss: 1.0054 - accuracy: 0.6848 - val_loss: 0.5433 - val_accuracy: 0.8312 - lr: 0.0010
Epoch 2/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.7101 - accuracy: 0.7714 - val_loss: 0.4733 - val_accuracy: 0.8540 - lr: 0.0010
Epoch 3/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.6409 - accuracy: 0.7981 - val_loss: 0.4139 - val_accuracy: 0.8746 - lr: 0.0010
Epoch 4/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.6052 - accuracy: 0.8094 - val_loss: 0.3697 - val_accuracy: 0.8903 - lr: 0.0010
Epoch 5/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5810 - accuracy: 0.8182 - val_loss: 0.4180 - val_accuracy: 0.8699 - lr: 0.0010
Epoch 6/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5640 - accuracy: 0.8242 - val_loss: 0.3849 - val_accuracy: 0.8916 - lr: 0.0010
Epoch 7/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5489 - accuracy: 0.8299 - val_loss: 0.3725 - val_accuracy: 0.8910 - lr: 0.0010
Epoch 8/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5397 - accuracy: 0.8333 - val_loss: 0.3675 - val_accuracy: 0.8940 - lr: 0.0010
Epoch 9/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5281 - accuracy: 0.8375 - val_loss: 0.3363 - val_accuracy: 0.9022 - lr: 0.0010
Epoch 10/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5241 - accuracy: 0.8374 - val_loss: 0.3684 - val_accuracy: 0.8939 - lr: 0.0010
Epoch 11/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5172 - accuracy: 0.8415 - val_loss: 0.3494 - val_accuracy: 0.9012 - lr: 0.0010
Epoch 12/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5129 - accuracy: 0.8419 - val_loss: 0.3404 - val_accuracy: 0.9038 - lr: 0.0010
Epoch 13/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5082 - accuracy: 0.8430 - val_loss: 0.3483 - val_accuracy: 0.9013 - lr: 0.0010
Epoch 14/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5043 - accuracy: 0.8447 - val_loss: 0.3381 - val_accuracy: 0.9034 - lr: 0.0010
Epoch 15/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5025 - accuracy: 0.8460 - val_loss: 0.3428 - val_accuracy: 0.9023 - lr: 0.0010
Epoch 16/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.5003 - accuracy: 0.8467 - val_loss: 0.3469 - val_accuracy: 0.9004 - lr: 0.0010
Epoch 17/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4969 - accuracy: 0.8473 - val_loss: 0.3361 - val_accuracy: 0.9050 - lr: 0.0010
Epoch 18/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4912 - accuracy: 0.8496 - val_loss: 0.3299 - val_accuracy: 0.9057 - lr: 0.0010
Epoch 19/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4895 - accuracy: 0.8500 - val_loss: 0.3347 - val_accuracy: 0.9057 - lr: 0.0010
Epoch 20/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4889 - accuracy: 0.8495 - val_loss: 0.3152 - val_accuracy: 0.9090 - lr: 0.0010
Epoch 21/100
4500/4500 [==============================] - 23s 5ms/step - loss: 0.4867 - accuracy: 0.8508 - val_loss: 0.3252 - val_accuracy: 0.9038 - lr: 0.0010
Epoch 22/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4850 - accuracy: 0.8509 - val_loss: 0.3211 - val_accuracy: 0.9084 - lr: 0.0010
Epoch 23/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4806 - accuracy: 0.8532 - val_loss: 0.3158 - val_accuracy: 0.9095 - lr: 0.0010
Epoch 24/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4787 - accuracy: 0.8538 - val_loss: 0.3371 - val_accuracy: 0.9057 - lr: 0.0010
Epoch 25/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4790 - accuracy: 0.8543 - val_loss: 0.3131 - val_accuracy: 0.9102 - lr: 0.0010
Epoch 26/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4775 - accuracy: 0.8542 - val_loss: 0.3267 - val_accuracy: 0.9058 - lr: 0.0010
Epoch 27/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4729 - accuracy: 0.8555 - val_loss: 0.3223 - val_accuracy: 0.9108 - lr: 0.0010
Epoch 28/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4724 - accuracy: 0.8557 - val_loss: 0.3311 - val_accuracy: 0.9036 - lr: 0.0010
Epoch 29/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4729 - accuracy: 0.8554 - val_loss: 0.3186 - val_accuracy: 0.9118 - lr: 0.0010
Epoch 30/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4740 - accuracy: 0.8556 - val_loss: 0.3206 - val_accuracy: 0.9120 - lr: 0.0010
Epoch 31/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4690 - accuracy: 0.8563 - val_loss: 0.3172 - val_accuracy: 0.9068 - lr: 0.0010
Epoch 32/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4690 - accuracy: 0.8565 - val_loss: 0.3171 - val_accuracy: 0.9100 - lr: 0.0010
Epoch 33/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4672 - accuracy: 0.8571 - val_loss: 0.3369 - val_accuracy: 0.9031 - lr: 0.0010
Epoch 34/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4678 - accuracy: 0.8575 - val_loss: 0.3131 - val_accuracy: 0.9109 - lr: 0.0010
Epoch 35/100
4500/4500 [==============================] - 23s 5ms/step - loss: 0.4664 - accuracy: 0.8585 - val_loss: 0.3107 - val_accuracy: 0.9151 - lr: 0.0010
Epoch 36/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4642 - accuracy: 0.8577 - val_loss: 0.3092 - val_accuracy: 0.9131 - lr: 0.0010
Epoch 37/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4653 - accuracy: 0.8584 - val_loss: 0.3248 - val_accuracy: 0.9065 - lr: 0.0010
Epoch 38/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4640 - accuracy: 0.8582 - val_loss: 0.3165 - val_accuracy: 0.9124 - lr: 0.0010
Epoch 39/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4633 - accuracy: 0.8588 - val_loss: 0.3230 - val_accuracy: 0.9093 - lr: 0.0010
Epoch 40/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4647 - accuracy: 0.8580 - val_loss: 0.3075 - val_accuracy: 0.9110 - lr: 0.0010
Epoch 41/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4605 - accuracy: 0.8595 - val_loss: 0.3128 - val_accuracy: 0.9147 - lr: 0.0010
Epoch 42/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4621 - accuracy: 0.8581 - val_loss: 0.3128 - val_accuracy: 0.9144 - lr: 0.0010
Epoch 43/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4584 - accuracy: 0.8592 - val_loss: 0.3159 - val_accuracy: 0.9155 - lr: 0.0010
Epoch 44/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4605 - accuracy: 0.8594 - val_loss: 0.3087 - val_accuracy: 0.9134 - lr: 0.0010
Epoch 45/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4588 - accuracy: 0.8598 - val_loss: 0.3156 - val_accuracy: 0.9148 - lr: 0.0010
Epoch 46/100
4500/4500 [==============================] - 23s 5ms/step - loss: 0.4560 - accuracy: 0.8606 - val_loss: 0.3202 - val_accuracy: 0.9110 - lr: 0.0010
Epoch 47/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4580 - accuracy: 0.8613 - val_loss: 0.3173 - val_accuracy: 0.9115 - lr: 0.0010
Epoch 48/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4545 - accuracy: 0.8617 - val_loss: 0.3116 - val_accuracy: 0.9147 - lr: 0.0010
Epoch 49/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4559 - accuracy: 0.8621 - val_loss: 0.3417 - val_accuracy: 0.9008 - lr: 0.0010
Epoch 50/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4569 - accuracy: 0.8608 - val_loss: 0.3218 - val_accuracy: 0.9128 - lr: 0.0010
Epoch 50: early stopping

from numpy import mean
from numpy import std
from matplotlib import pyplot
from sklearn.model_selection import KFold
from keras.datasets import fashion_mnist
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.optimizers import SGD
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(100, activation='relu', kernel_initializer='he_uniform'))
model.add(Dense(10, activation='softmax'))
# compile model
opt = SGD(learning_rate=0.01, momentum=0.9)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
# fit model
h_callback = model.fit(X_train, y_train, epochs = 10,
validation_data=(X_val, y_val))
# Plot train vs test loss during training
plot_loss(h_callback.history['loss'], h_callback.history['val_loss'])
# Plot train vs test accuracy during training
plot_accuracy(h_callback.history['accuracy'], h_callback.history['val_accuracy'])
precision,recall,f1 = get_metrics(model,X_test)
Model_scores = pd.concat([Model_scores,pd.DataFrame([['Machine Learning Mastery model',test_acc,test_loss,precision,recall,f1]],columns=Model_scores.columns)],ignore_index=True)
Epoch 1/10 4500/4500 [==============================] - 16s 4ms/step - loss: 0.6277 - accuracy: 0.7726 - val_loss: 0.3379 - val_accuracy: 0.8813 Epoch 2/10 4500/4500 [==============================] - 15s 3ms/step - loss: 0.4369 - accuracy: 0.8412 - val_loss: 0.3053 - val_accuracy: 0.8854 Epoch 3/10 4500/4500 [==============================] - 15s 3ms/step - loss: 0.3774 - accuracy: 0.8617 - val_loss: 0.2967 - val_accuracy: 0.8940 Epoch 4/10 4500/4500 [==============================] - 16s 3ms/step - loss: 0.3355 - accuracy: 0.8769 - val_loss: 0.2975 - val_accuracy: 0.8965 Epoch 5/10 4500/4500 [==============================] - 16s 3ms/step - loss: 0.3020 - accuracy: 0.8889 - val_loss: 0.2821 - val_accuracy: 0.9032 Epoch 6/10 4500/4500 [==============================] - 16s 4ms/step - loss: 0.2760 - accuracy: 0.8968 - val_loss: 0.2763 - val_accuracy: 0.9070 Epoch 7/10 4500/4500 [==============================] - 16s 4ms/step - loss: 0.2506 - accuracy: 0.9069 - val_loss: 0.2841 - val_accuracy: 0.9038 Epoch 8/10 4500/4500 [==============================] - 16s 4ms/step - loss: 0.2284 - accuracy: 0.9150 - val_loss: 0.3181 - val_accuracy: 0.8982 Epoch 9/10 4500/4500 [==============================] - 15s 3ms/step - loss: 0.2083 - accuracy: 0.9225 - val_loss: 0.3067 - val_accuracy: 0.9016 Epoch 10/10 4500/4500 [==============================] - 15s 3ms/step - loss: 0.1885 - accuracy: 0.9295 - val_loss: 0.3339 - val_accuracy: 0.8992
313/313 [==============================] - 0s 1ms/step
pred = model.predict(X_test)
pred = np.argmax(pred, axis=1)
classifation_matrix = confusion_matrix(y_test_label, pred)
# plot confusion matrix
plt.figure(figsize=(10,10))
sns.heatmap(classifation_matrix, annot=True, fmt="d")
plt.title("Confusion matrix")
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
gc.collect()
tf.keras.backend.clear_session()
del model
313/313 [==============================] - 0s 1ms/step
from keras.callbacks import ModelCheckpoint
from keras.models import load_model
final_model = Sequential()
final_model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',kernel_initializer='he_normal',input_shape=(28,28,1)))
final_model.add(LeakyReLU(alpha=0.1))
final_model.add(MaxPooling2D((2, 2)))
final_model.add(BatchNormalization())
final_model.add(Dropout(0.25))
final_model.add(Conv2D(64, (5, 5), activation='linear'))
final_model.add(LeakyReLU(alpha=0.1))
final_model.add(BatchNormalization())
final_model.add(MaxPooling2D(pool_size=(3, 3)))
final_model.add(Dropout(0.25))
final_model.add(Conv2D(128, (3, 3), activation='linear'))
final_model.add(LeakyReLU(alpha=0.1))
final_model.add(BatchNormalization())
final_model.add(Dropout(0.4))
final_model.add(Flatten())
final_model.add(Dense(128, activation='linear'))
final_model.add(LeakyReLU(alpha=0.1))
final_model.add(Dropout(0.3))
final_model.add(Flatten())
final_model.add(Dense(10, activation='softmax', kernel_regularizer=tensorflow.keras.regularizers.L1(0.01)))
final_model.compile(optimizer='adam',
loss=tf.keras.losses.categorical_crossentropy,
metrics=['accuracy'])
final_model.summary()
early_stopping = EarlyStopping(monitor='val_loss', patience=20, verbose=1)
mc = ModelCheckpoint('best_model.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
patience=5, min_lr=0.001)
h_callback = final_model.fit(X_train, y_train, epochs = 200,
validation_data=(X_val, y_val), callbacks=[early_stopping,mc,reduce_lr] , batch_size=64)
# Plot train vs test loss during training
plot_loss(h_callback.history['loss'], h_callback.history['val_loss'])
plot_accuracy(h_callback.history['accuracy'], h_callback.history['val_accuracy'])
saved_model = load_model('best_model.h5')
# evaluate the model
_, train_acc = saved_model.evaluate(X_train, y_train, verbose=0)
_, test_acc = saved_model.evaluate(X_test, y_test, verbose=0)
print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))
precision,recall,f1 = get_metrics(saved_model,X_test)
Model_scores = pd.concat([Model_scores,pd.DataFrame([['Fan in final model',test_acc,test_loss,precision,recall,f1]],columns=Model_scores.columns)],ignore_index=True)
Model: "sequential_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_6 (Conv2D) (None, 26, 26, 32) 320
leaky_re_lu_7 (LeakyReLU) (None, 26, 26, 32) 0
max_pooling2d_4 (MaxPooling (None, 13, 13, 32) 0
2D)
batch_normalization_6 (Batc (None, 13, 13, 32) 128
hNormalization)
dropout_8 (Dropout) (None, 13, 13, 32) 0
conv2d_7 (Conv2D) (None, 9, 9, 64) 51264
leaky_re_lu_8 (LeakyReLU) (None, 9, 9, 64) 0
batch_normalization_7 (Batc (None, 9, 9, 64) 256
hNormalization)
max_pooling2d_5 (MaxPooling (None, 3, 3, 64) 0
2D)
dropout_9 (Dropout) (None, 3, 3, 64) 0
conv2d_8 (Conv2D) (None, 1, 1, 128) 73856
leaky_re_lu_9 (LeakyReLU) (None, 1, 1, 128) 0
batch_normalization_8 (Batc (None, 1, 1, 128) 512
hNormalization)
dropout_10 (Dropout) (None, 1, 1, 128) 0
flatten_4 (Flatten) (None, 128) 0
dense_4 (Dense) (None, 128) 16512
leaky_re_lu_10 (LeakyReLU) (None, 128) 0
dropout_11 (Dropout) (None, 128) 0
flatten_5 (Flatten) (None, 128) 0
dense_5 (Dense) (None, 10) 1290
=================================================================
Total params: 144,138
Trainable params: 143,690
Non-trainable params: 448
_________________________________________________________________
Epoch 1/200
2246/2250 [============================>.] - ETA: 0s - loss: 1.0953 - accuracy: 0.6966
Epoch 1: val_accuracy improved from -inf to 0.85583, saving model to best_model.h5
2250/2250 [==============================] - 18s 8ms/step - loss: 1.0948 - accuracy: 0.6967 - val_loss: 0.5130 - val_accuracy: 0.8558 - lr: 0.0010
Epoch 2/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.7111 - accuracy: 0.7832
Epoch 2: val_accuracy improved from 0.85583 to 0.87625, saving model to best_model.h5
2250/2250 [==============================] - 17s 8ms/step - loss: 0.7112 - accuracy: 0.7832 - val_loss: 0.4427 - val_accuracy: 0.8763 - lr: 0.0010
Epoch 3/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.6411 - accuracy: 0.8028
Epoch 3: val_accuracy did not improve from 0.87625
2250/2250 [==============================] - 17s 7ms/step - loss: 0.6410 - accuracy: 0.8028 - val_loss: 0.4181 - val_accuracy: 0.8756 - lr: 0.0010
Epoch 4/200
2250/2250 [==============================] - ETA: 0s - loss: 0.6014 - accuracy: 0.8146
Epoch 4: val_accuracy improved from 0.87625 to 0.88042, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.6014 - accuracy: 0.8146 - val_loss: 0.4041 - val_accuracy: 0.8804 - lr: 0.0010
Epoch 5/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.5820 - accuracy: 0.8204
Epoch 5: val_accuracy improved from 0.88042 to 0.89208, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.5820 - accuracy: 0.8205 - val_loss: 0.3730 - val_accuracy: 0.8921 - lr: 0.0010
Epoch 6/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.5649 - accuracy: 0.8259
Epoch 6: val_accuracy improved from 0.89208 to 0.89600, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.5647 - accuracy: 0.8259 - val_loss: 0.3669 - val_accuracy: 0.8960 - lr: 0.0010
Epoch 7/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.5539 - accuracy: 0.8293
Epoch 7: val_accuracy improved from 0.89600 to 0.89908, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.5540 - accuracy: 0.8293 - val_loss: 0.3579 - val_accuracy: 0.8991 - lr: 0.0010
Epoch 8/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.5419 - accuracy: 0.8334
Epoch 8: val_accuracy did not improve from 0.89908
2250/2250 [==============================] - 16s 7ms/step - loss: 0.5418 - accuracy: 0.8334 - val_loss: 0.3703 - val_accuracy: 0.8878 - lr: 0.0010
Epoch 9/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.5335 - accuracy: 0.8368
Epoch 9: val_accuracy improved from 0.89908 to 0.90367, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.5337 - accuracy: 0.8368 - val_loss: 0.3377 - val_accuracy: 0.9037 - lr: 0.0010
Epoch 10/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.5314 - accuracy: 0.8366
Epoch 10: val_accuracy did not improve from 0.90367
2250/2250 [==============================] - 16s 7ms/step - loss: 0.5316 - accuracy: 0.8365 - val_loss: 0.3523 - val_accuracy: 0.8982 - lr: 0.0010
Epoch 11/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.5216 - accuracy: 0.8397
Epoch 11: val_accuracy improved from 0.90367 to 0.90483, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.5216 - accuracy: 0.8397 - val_loss: 0.3341 - val_accuracy: 0.9048 - lr: 0.0010
Epoch 12/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.5179 - accuracy: 0.8421
Epoch 12: val_accuracy did not improve from 0.90483
2250/2250 [==============================] - 16s 7ms/step - loss: 0.5179 - accuracy: 0.8422 - val_loss: 0.3626 - val_accuracy: 0.8979 - lr: 0.0010
Epoch 13/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.5113 - accuracy: 0.8439
Epoch 13: val_accuracy did not improve from 0.90483
2250/2250 [==============================] - 16s 7ms/step - loss: 0.5112 - accuracy: 0.8439 - val_loss: 0.3516 - val_accuracy: 0.8965 - lr: 0.0010
Epoch 14/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.5076 - accuracy: 0.8453
Epoch 14: val_accuracy did not improve from 0.90483
2250/2250 [==============================] - 16s 7ms/step - loss: 0.5077 - accuracy: 0.8453 - val_loss: 0.3445 - val_accuracy: 0.9002 - lr: 0.0010
Epoch 15/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.5041 - accuracy: 0.8466
Epoch 15: val_accuracy improved from 0.90483 to 0.91083, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.5041 - accuracy: 0.8466 - val_loss: 0.3218 - val_accuracy: 0.9108 - lr: 0.0010
Epoch 16/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.5007 - accuracy: 0.8474
Epoch 16: val_accuracy did not improve from 0.91083
2250/2250 [==============================] - 16s 7ms/step - loss: 0.5006 - accuracy: 0.8474 - val_loss: 0.3218 - val_accuracy: 0.9098 - lr: 0.0010
Epoch 17/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4984 - accuracy: 0.8488
Epoch 17: val_accuracy did not improve from 0.91083
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4984 - accuracy: 0.8488 - val_loss: 0.3234 - val_accuracy: 0.9088 - lr: 0.0010
Epoch 18/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4966 - accuracy: 0.8483
Epoch 18: val_accuracy did not improve from 0.91083
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4965 - accuracy: 0.8483 - val_loss: 0.3264 - val_accuracy: 0.9089 - lr: 0.0010
Epoch 19/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4946 - accuracy: 0.8488
Epoch 19: val_accuracy did not improve from 0.91083
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4946 - accuracy: 0.8489 - val_loss: 0.3401 - val_accuracy: 0.9075 - lr: 0.0010
Epoch 20/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4895 - accuracy: 0.8511
Epoch 20: val_accuracy did not improve from 0.91083
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4894 - accuracy: 0.8511 - val_loss: 0.3212 - val_accuracy: 0.9090 - lr: 0.0010
Epoch 21/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4857 - accuracy: 0.8518
Epoch 21: val_accuracy did not improve from 0.91083
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4857 - accuracy: 0.8518 - val_loss: 0.3301 - val_accuracy: 0.9056 - lr: 0.0010
Epoch 22/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4855 - accuracy: 0.8521
Epoch 22: val_accuracy improved from 0.91083 to 0.91292, saving model to best_model.h5
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4855 - accuracy: 0.8521 - val_loss: 0.3194 - val_accuracy: 0.9129 - lr: 0.0010
Epoch 23/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4819 - accuracy: 0.8535
Epoch 23: val_accuracy improved from 0.91292 to 0.91383, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4819 - accuracy: 0.8535 - val_loss: 0.3202 - val_accuracy: 0.9138 - lr: 0.0010
Epoch 24/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4798 - accuracy: 0.8546
Epoch 24: val_accuracy did not improve from 0.91383
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4799 - accuracy: 0.8546 - val_loss: 0.3212 - val_accuracy: 0.9104 - lr: 0.0010
Epoch 25/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4804 - accuracy: 0.8533
Epoch 25: val_accuracy did not improve from 0.91383
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4804 - accuracy: 0.8533 - val_loss: 0.3149 - val_accuracy: 0.9093 - lr: 0.0010
Epoch 26/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4774 - accuracy: 0.8543
Epoch 26: val_accuracy did not improve from 0.91383
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4775 - accuracy: 0.8543 - val_loss: 0.3122 - val_accuracy: 0.9137 - lr: 0.0010
Epoch 27/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4757 - accuracy: 0.8556
Epoch 27: val_accuracy improved from 0.91383 to 0.91400, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4758 - accuracy: 0.8555 - val_loss: 0.3092 - val_accuracy: 0.9140 - lr: 0.0010
Epoch 28/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4723 - accuracy: 0.8565
Epoch 28: val_accuracy did not improve from 0.91400
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4723 - accuracy: 0.8565 - val_loss: 0.3068 - val_accuracy: 0.9133 - lr: 0.0010
Epoch 29/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4729 - accuracy: 0.8571
Epoch 29: val_accuracy did not improve from 0.91400
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4730 - accuracy: 0.8571 - val_loss: 0.3273 - val_accuracy: 0.9076 - lr: 0.0010
Epoch 30/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4702 - accuracy: 0.8580
Epoch 30: val_accuracy did not improve from 0.91400
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4702 - accuracy: 0.8580 - val_loss: 0.3125 - val_accuracy: 0.9121 - lr: 0.0010
Epoch 31/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4690 - accuracy: 0.8578
Epoch 31: val_accuracy did not improve from 0.91400
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4690 - accuracy: 0.8578 - val_loss: 0.3173 - val_accuracy: 0.9112 - lr: 0.0010
Epoch 32/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4647 - accuracy: 0.8594
Epoch 32: val_accuracy improved from 0.91400 to 0.91442, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4648 - accuracy: 0.8593 - val_loss: 0.3097 - val_accuracy: 0.9144 - lr: 0.0010
Epoch 33/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4660 - accuracy: 0.8582
Epoch 33: val_accuracy did not improve from 0.91442
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4660 - accuracy: 0.8582 - val_loss: 0.3446 - val_accuracy: 0.9053 - lr: 0.0010
Epoch 34/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4641 - accuracy: 0.8593
Epoch 34: val_accuracy did not improve from 0.91442
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4642 - accuracy: 0.8593 - val_loss: 0.3219 - val_accuracy: 0.9077 - lr: 0.0010
Epoch 35/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4633 - accuracy: 0.8594
Epoch 35: val_accuracy did not improve from 0.91442
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4633 - accuracy: 0.8594 - val_loss: 0.3086 - val_accuracy: 0.9141 - lr: 0.0010
Epoch 36/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4616 - accuracy: 0.8608
Epoch 36: val_accuracy did not improve from 0.91442
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4616 - accuracy: 0.8608 - val_loss: 0.3159 - val_accuracy: 0.9076 - lr: 0.0010
Epoch 37/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4620 - accuracy: 0.8593
Epoch 37: val_accuracy improved from 0.91442 to 0.91608, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4620 - accuracy: 0.8593 - val_loss: 0.3106 - val_accuracy: 0.9161 - lr: 0.0010
Epoch 38/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4624 - accuracy: 0.8597
Epoch 38: val_accuracy did not improve from 0.91608
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4624 - accuracy: 0.8597 - val_loss: 0.3100 - val_accuracy: 0.9160 - lr: 0.0010
Epoch 39/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4625 - accuracy: 0.8609
Epoch 39: val_accuracy improved from 0.91608 to 0.91817, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4625 - accuracy: 0.8609 - val_loss: 0.3022 - val_accuracy: 0.9182 - lr: 0.0010
Epoch 40/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4600 - accuracy: 0.8606
Epoch 40: val_accuracy did not improve from 0.91817
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4600 - accuracy: 0.8606 - val_loss: 0.3021 - val_accuracy: 0.9152 - lr: 0.0010
Epoch 41/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4589 - accuracy: 0.8617
Epoch 41: val_accuracy did not improve from 0.91817
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4589 - accuracy: 0.8617 - val_loss: 0.3054 - val_accuracy: 0.9163 - lr: 0.0010
Epoch 42/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4541 - accuracy: 0.8627
Epoch 42: val_accuracy did not improve from 0.91817
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4540 - accuracy: 0.8627 - val_loss: 0.3150 - val_accuracy: 0.9129 - lr: 0.0010
Epoch 43/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4564 - accuracy: 0.8622
Epoch 43: val_accuracy did not improve from 0.91817
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4564 - accuracy: 0.8622 - val_loss: 0.3128 - val_accuracy: 0.9139 - lr: 0.0010
Epoch 44/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4557 - accuracy: 0.8628
Epoch 44: val_accuracy did not improve from 0.91817
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4557 - accuracy: 0.8628 - val_loss: 0.3096 - val_accuracy: 0.9158 - lr: 0.0010
Epoch 45/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4533 - accuracy: 0.8627
Epoch 45: val_accuracy did not improve from 0.91817
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4532 - accuracy: 0.8627 - val_loss: 0.3253 - val_accuracy: 0.9059 - lr: 0.0010
Epoch 46/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4519 - accuracy: 0.8636
Epoch 46: val_accuracy did not improve from 0.91817
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4518 - accuracy: 0.8636 - val_loss: 0.3089 - val_accuracy: 0.9173 - lr: 0.0010
Epoch 47/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4509 - accuracy: 0.8638
Epoch 47: val_accuracy did not improve from 0.91817
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4509 - accuracy: 0.8638 - val_loss: 0.2981 - val_accuracy: 0.9161 - lr: 0.0010
Epoch 48/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4507 - accuracy: 0.8636
Epoch 48: val_accuracy did not improve from 0.91817
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4507 - accuracy: 0.8636 - val_loss: 0.3051 - val_accuracy: 0.9167 - lr: 0.0010
Epoch 49/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4548 - accuracy: 0.8623
Epoch 49: val_accuracy did not improve from 0.91817
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4547 - accuracy: 0.8623 - val_loss: 0.3099 - val_accuracy: 0.9120 - lr: 0.0010
Epoch 50/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4521 - accuracy: 0.8635
Epoch 50: val_accuracy did not improve from 0.91817
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4522 - accuracy: 0.8635 - val_loss: 0.3108 - val_accuracy: 0.9121 - lr: 0.0010
Epoch 51/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4514 - accuracy: 0.8633
Epoch 51: val_accuracy did not improve from 0.91817
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4512 - accuracy: 0.8633 - val_loss: 0.3171 - val_accuracy: 0.9088 - lr: 0.0010
Epoch 52/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4480 - accuracy: 0.8646
Epoch 52: val_accuracy did not improve from 0.91817
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4480 - accuracy: 0.8646 - val_loss: 0.3192 - val_accuracy: 0.9126 - lr: 0.0010
Epoch 53/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4488 - accuracy: 0.8643
Epoch 53: val_accuracy did not improve from 0.91817
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4488 - accuracy: 0.8643 - val_loss: 0.3068 - val_accuracy: 0.9117 - lr: 0.0010
Epoch 54/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4488 - accuracy: 0.8646
Epoch 54: val_accuracy did not improve from 0.91817
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4489 - accuracy: 0.8646 - val_loss: 0.3041 - val_accuracy: 0.9166 - lr: 0.0010
Epoch 55/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4470 - accuracy: 0.8650
Epoch 55: val_accuracy did not improve from 0.91817
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4470 - accuracy: 0.8650 - val_loss: 0.3047 - val_accuracy: 0.9171 - lr: 0.0010
Epoch 56/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4476 - accuracy: 0.8653
Epoch 56: val_accuracy did not improve from 0.91817
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4476 - accuracy: 0.8653 - val_loss: 0.3136 - val_accuracy: 0.9142 - lr: 0.0010
Epoch 57/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4471 - accuracy: 0.8653
Epoch 57: val_accuracy did not improve from 0.91817
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4473 - accuracy: 0.8653 - val_loss: 0.3153 - val_accuracy: 0.9121 - lr: 0.0010
Epoch 58/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4453 - accuracy: 0.8660
Epoch 58: val_accuracy improved from 0.91817 to 0.91883, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4453 - accuracy: 0.8660 - val_loss: 0.2956 - val_accuracy: 0.9188 - lr: 0.0010
Epoch 59/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4452 - accuracy: 0.8659
Epoch 59: val_accuracy did not improve from 0.91883
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4451 - accuracy: 0.8659 - val_loss: 0.2980 - val_accuracy: 0.9174 - lr: 0.0010
Epoch 60/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4466 - accuracy: 0.8651
Epoch 60: val_accuracy did not improve from 0.91883
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4466 - accuracy: 0.8651 - val_loss: 0.3072 - val_accuracy: 0.9162 - lr: 0.0010
Epoch 61/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4456 - accuracy: 0.8649
Epoch 61: val_accuracy did not improve from 0.91883
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4455 - accuracy: 0.8650 - val_loss: 0.3093 - val_accuracy: 0.9112 - lr: 0.0010
Epoch 62/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4425 - accuracy: 0.8679
Epoch 62: val_accuracy did not improve from 0.91883
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4425 - accuracy: 0.8679 - val_loss: 0.3041 - val_accuracy: 0.9125 - lr: 0.0010
Epoch 63/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4442 - accuracy: 0.8665
Epoch 63: val_accuracy did not improve from 0.91883
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4442 - accuracy: 0.8665 - val_loss: 0.3040 - val_accuracy: 0.9143 - lr: 0.0010
Epoch 64/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4413 - accuracy: 0.8666
Epoch 64: val_accuracy did not improve from 0.91883
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4413 - accuracy: 0.8666 - val_loss: 0.3010 - val_accuracy: 0.9162 - lr: 0.0010
Epoch 65/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4440 - accuracy: 0.8658
Epoch 65: val_accuracy improved from 0.91883 to 0.91917, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4440 - accuracy: 0.8658 - val_loss: 0.2979 - val_accuracy: 0.9192 - lr: 0.0010
Epoch 66/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4425 - accuracy: 0.8674
Epoch 66: val_accuracy did not improve from 0.91917
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4423 - accuracy: 0.8675 - val_loss: 0.3110 - val_accuracy: 0.9138 - lr: 0.0010
Epoch 67/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4423 - accuracy: 0.8674
Epoch 67: val_accuracy did not improve from 0.91917
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4421 - accuracy: 0.8674 - val_loss: 0.3062 - val_accuracy: 0.9134 - lr: 0.0010
Epoch 68/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4394 - accuracy: 0.8687
Epoch 68: val_accuracy did not improve from 0.91917
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4394 - accuracy: 0.8687 - val_loss: 0.2944 - val_accuracy: 0.9175 - lr: 0.0010
Epoch 69/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4401 - accuracy: 0.8668
Epoch 69: val_accuracy improved from 0.91917 to 0.92092, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4400 - accuracy: 0.8668 - val_loss: 0.2894 - val_accuracy: 0.9209 - lr: 0.0010
Epoch 70/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4411 - accuracy: 0.8678
Epoch 70: val_accuracy improved from 0.92092 to 0.92133, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4411 - accuracy: 0.8678 - val_loss: 0.2970 - val_accuracy: 0.9213 - lr: 0.0010
Epoch 71/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4405 - accuracy: 0.8678
Epoch 71: val_accuracy did not improve from 0.92133
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4406 - accuracy: 0.8678 - val_loss: 0.2942 - val_accuracy: 0.9192 - lr: 0.0010
Epoch 72/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4386 - accuracy: 0.8681
Epoch 72: val_accuracy did not improve from 0.92133
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4387 - accuracy: 0.8680 - val_loss: 0.3026 - val_accuracy: 0.9153 - lr: 0.0010
Epoch 73/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4367 - accuracy: 0.8692
Epoch 73: val_accuracy did not improve from 0.92133
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4366 - accuracy: 0.8692 - val_loss: 0.2935 - val_accuracy: 0.9188 - lr: 0.0010
Epoch 74/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4393 - accuracy: 0.8682
Epoch 74: val_accuracy did not improve from 0.92133
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4393 - accuracy: 0.8682 - val_loss: 0.3057 - val_accuracy: 0.9153 - lr: 0.0010
Epoch 75/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4379 - accuracy: 0.8692
Epoch 75: val_accuracy improved from 0.92133 to 0.92467, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4380 - accuracy: 0.8692 - val_loss: 0.2870 - val_accuracy: 0.9247 - lr: 0.0010
Epoch 76/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4409 - accuracy: 0.8676
Epoch 76: val_accuracy did not improve from 0.92467
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4409 - accuracy: 0.8677 - val_loss: 0.3123 - val_accuracy: 0.9173 - lr: 0.0010
Epoch 77/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4382 - accuracy: 0.8682
Epoch 77: val_accuracy did not improve from 0.92467
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4382 - accuracy: 0.8682 - val_loss: 0.2987 - val_accuracy: 0.9180 - lr: 0.0010
Epoch 78/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4368 - accuracy: 0.8698
Epoch 78: val_accuracy did not improve from 0.92467
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4369 - accuracy: 0.8697 - val_loss: 0.3041 - val_accuracy: 0.9190 - lr: 0.0010
Epoch 79/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4364 - accuracy: 0.8691
Epoch 79: val_accuracy did not improve from 0.92467
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4363 - accuracy: 0.8691 - val_loss: 0.3015 - val_accuracy: 0.9182 - lr: 0.0010
Epoch 80/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4349 - accuracy: 0.8691
Epoch 80: val_accuracy did not improve from 0.92467
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4350 - accuracy: 0.8691 - val_loss: 0.2949 - val_accuracy: 0.9201 - lr: 0.0010
Epoch 81/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4344 - accuracy: 0.8693
Epoch 81: val_accuracy did not improve from 0.92467
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4344 - accuracy: 0.8693 - val_loss: 0.2979 - val_accuracy: 0.9208 - lr: 0.0010
Epoch 82/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4347 - accuracy: 0.8697
Epoch 82: val_accuracy did not improve from 0.92467
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4349 - accuracy: 0.8696 - val_loss: 0.2971 - val_accuracy: 0.9191 - lr: 0.0010
Epoch 83/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4338 - accuracy: 0.8696
Epoch 83: val_accuracy did not improve from 0.92467
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4338 - accuracy: 0.8696 - val_loss: 0.2977 - val_accuracy: 0.9197 - lr: 0.0010
Epoch 84/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4308 - accuracy: 0.8711
Epoch 84: val_accuracy did not improve from 0.92467
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4308 - accuracy: 0.8710 - val_loss: 0.3024 - val_accuracy: 0.9158 - lr: 0.0010
Epoch 85/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4364 - accuracy: 0.8699
Epoch 85: val_accuracy did not improve from 0.92467
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4364 - accuracy: 0.8699 - val_loss: 0.2934 - val_accuracy: 0.9185 - lr: 0.0010
Epoch 86/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4317 - accuracy: 0.8706
Epoch 86: val_accuracy did not improve from 0.92467
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4317 - accuracy: 0.8707 - val_loss: 0.2997 - val_accuracy: 0.9152 - lr: 0.0010
Epoch 87/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4359 - accuracy: 0.8695
Epoch 87: val_accuracy did not improve from 0.92467
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4359 - accuracy: 0.8695 - val_loss: 0.2923 - val_accuracy: 0.9199 - lr: 0.0010
Epoch 88/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4335 - accuracy: 0.8694
Epoch 88: val_accuracy did not improve from 0.92467
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4334 - accuracy: 0.8694 - val_loss: 0.3068 - val_accuracy: 0.9153 - lr: 0.0010
Epoch 89/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4352 - accuracy: 0.8694
Epoch 89: val_accuracy did not improve from 0.92467
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4350 - accuracy: 0.8694 - val_loss: 0.2939 - val_accuracy: 0.9193 - lr: 0.0010
Epoch 90/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4322 - accuracy: 0.8719
Epoch 90: val_accuracy did not improve from 0.92467
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4323 - accuracy: 0.8719 - val_loss: 0.3235 - val_accuracy: 0.9063 - lr: 0.0010
Epoch 91/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4295 - accuracy: 0.8704
Epoch 91: val_accuracy did not improve from 0.92467
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4294 - accuracy: 0.8703 - val_loss: 0.2892 - val_accuracy: 0.9216 - lr: 0.0010
Epoch 92/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4339 - accuracy: 0.8702
Epoch 92: val_accuracy did not improve from 0.92467
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4339 - accuracy: 0.8702 - val_loss: 0.2950 - val_accuracy: 0.9202 - lr: 0.0010
Epoch 93/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4312 - accuracy: 0.8705
Epoch 93: val_accuracy did not improve from 0.92467
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4313 - accuracy: 0.8705 - val_loss: 0.2930 - val_accuracy: 0.9185 - lr: 0.0010
Epoch 94/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4297 - accuracy: 0.8714
Epoch 94: val_accuracy did not improve from 0.92467
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4296 - accuracy: 0.8713 - val_loss: 0.3003 - val_accuracy: 0.9172 - lr: 0.0010
Epoch 95/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4303 - accuracy: 0.8715
Epoch 95: val_accuracy did not improve from 0.92467
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4303 - accuracy: 0.8715 - val_loss: 0.2897 - val_accuracy: 0.9205 - lr: 0.0010
Epoch 95: early stopping
Train: 0.917, Test: 0.914 313/313 [==============================] - 1s 2ms/step
import visualkeras
visualkeras.layered_view(saved_model)
pred = saved_model.predict(X_test)
pred = np.argmax(pred, axis=1)
classifation_matrix = confusion_matrix(y_test_label, pred)
# plot confusion matrix
plt.figure(figsize=(10,10))
sns.heatmap(classifation_matrix, annot=True, fmt="d")
plt.title("Confusion matrix")
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
313/313 [==============================] - 1s 2ms/step
from keras.callbacks import ModelCheckpoint
from keras.models import load_model
final_model2 = Sequential()
final_model2.add(Conv2D(128, kernel_size=(3, 3),activation='linear',kernel_initializer='he_normal',input_shape=(28,28,1)))
final_model2.add(LeakyReLU(alpha=0.1))
final_model2.add(MaxPooling2D((2, 2)))
final_model2.add(BatchNormalization())
final_model2.add(Dropout(0.25))
final_model2.add(Conv2D(64, (5, 5), activation='linear'))
final_model2.add(LeakyReLU(alpha=0.1))
final_model2.add(BatchNormalization())
final_model2.add(MaxPooling2D(pool_size=(3, 3)))
final_model2.add(Dropout(0.25))
final_model2.add(Conv2D(32, (3, 3), activation='linear'))
final_model2.add(LeakyReLU(alpha=0.1))
final_model2.add(BatchNormalization())
final_model2.add(Dropout(0.4))
final_model2.add(Flatten())
final_model2.add(Dense(128, activation='linear'))
final_model2.add(LeakyReLU(alpha=0.1))
final_model2.add(Dropout(0.3))
final_model2.add(Dense(64, activation='linear'))
final_model2.add(LeakyReLU(alpha=0.1))
final_model2.add(Dense(10, activation='softmax', kernel_regularizer=tensorflow.keras.regularizers.L1(0.01)))
final_model.compile(optimizer='adam',
loss=tf.keras.losses.categorical_crossentropy,
metrics=['accuracy'])
final_model.summary()
early_stopping = EarlyStopping(monitor='val_loss', patience=20, verbose=1)
mc = ModelCheckpoint('final_fanout_model.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
patience=5, min_lr=0.001)
h_callback = final_model.fit(X_train, y_train, epochs = 200,
validation_data=(X_val, y_val), callbacks=[early_stopping,mc,reduce_lr] , batch_size=64)
# Plot train vs test loss during training
plot_loss(h_callback.history['loss'], h_callback.history['val_loss'])
plot_accuracy(h_callback.history['accuracy'], h_callback.history['val_accuracy'])
fanout_model = load_model('final_fanout_model.h5')
# evaluate the model
_, train_acc = fanout_model.evaluate(X_train, y_train, verbose=0)
_, test_acc = fanout_model.evaluate(X_test, y_test, verbose=0)
print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))
precision,recall,f1 = get_metrics(fanout_model,X_test)
Model_scores = pd.concat([Model_scores,pd.DataFrame([['Fan out final model',test_acc,test_loss,precision,recall,f1]],columns=Model_scores.columns)],ignore_index=True)
visualkeras.layered_view(fanout_model)
Model: "sequential_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_6 (Conv2D) (None, 26, 26, 32) 320
leaky_re_lu_7 (LeakyReLU) (None, 26, 26, 32) 0
max_pooling2d_4 (MaxPooling (None, 13, 13, 32) 0
2D)
batch_normalization_6 (Batc (None, 13, 13, 32) 128
hNormalization)
dropout_8 (Dropout) (None, 13, 13, 32) 0
conv2d_7 (Conv2D) (None, 9, 9, 64) 51264
leaky_re_lu_8 (LeakyReLU) (None, 9, 9, 64) 0
batch_normalization_7 (Batc (None, 9, 9, 64) 256
hNormalization)
max_pooling2d_5 (MaxPooling (None, 3, 3, 64) 0
2D)
dropout_9 (Dropout) (None, 3, 3, 64) 0
conv2d_8 (Conv2D) (None, 1, 1, 128) 73856
leaky_re_lu_9 (LeakyReLU) (None, 1, 1, 128) 0
batch_normalization_8 (Batc (None, 1, 1, 128) 512
hNormalization)
dropout_10 (Dropout) (None, 1, 1, 128) 0
flatten_4 (Flatten) (None, 128) 0
dense_4 (Dense) (None, 128) 16512
leaky_re_lu_10 (LeakyReLU) (None, 128) 0
dropout_11 (Dropout) (None, 128) 0
flatten_5 (Flatten) (None, 128) 0
dense_5 (Dense) (None, 10) 1290
=================================================================
Total params: 144,138
Trainable params: 143,690
Non-trainable params: 448
_________________________________________________________________
Epoch 1/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4300 - accuracy: 0.8714
Epoch 1: val_accuracy improved from -inf to 0.91733, saving model to final_fanout_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4301 - accuracy: 0.8714 - val_loss: 0.3031 - val_accuracy: 0.9173 - lr: 0.0010
Epoch 2/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4324 - accuracy: 0.8706
Epoch 2: val_accuracy improved from 0.91733 to 0.92175, saving model to final_fanout_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4323 - accuracy: 0.8706 - val_loss: 0.2897 - val_accuracy: 0.9218 - lr: 0.0010
Epoch 3/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4309 - accuracy: 0.8703
Epoch 3: val_accuracy did not improve from 0.92175
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4310 - accuracy: 0.8703 - val_loss: 0.2949 - val_accuracy: 0.9195 - lr: 0.0010
Epoch 4/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4304 - accuracy: 0.8707
Epoch 4: val_accuracy did not improve from 0.92175
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4303 - accuracy: 0.8707 - val_loss: 0.2978 - val_accuracy: 0.9172 - lr: 0.0010
Epoch 5/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4314 - accuracy: 0.8702
Epoch 5: val_accuracy improved from 0.92175 to 0.92192, saving model to final_fanout_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4315 - accuracy: 0.8701 - val_loss: 0.2886 - val_accuracy: 0.9219 - lr: 0.0010
Epoch 6/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4306 - accuracy: 0.8713
Epoch 6: val_accuracy did not improve from 0.92192
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4306 - accuracy: 0.8713 - val_loss: 0.3140 - val_accuracy: 0.9083 - lr: 0.0010
Epoch 7/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4290 - accuracy: 0.8714
Epoch 7: val_accuracy did not improve from 0.92192
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4291 - accuracy: 0.8714 - val_loss: 0.2898 - val_accuracy: 0.9205 - lr: 0.0010
Epoch 8/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4283 - accuracy: 0.8719
Epoch 8: val_accuracy did not improve from 0.92192
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4286 - accuracy: 0.8718 - val_loss: 0.2888 - val_accuracy: 0.9192 - lr: 0.0010
Epoch 9/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4292 - accuracy: 0.8721
Epoch 9: val_accuracy did not improve from 0.92192
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4291 - accuracy: 0.8722 - val_loss: 0.2897 - val_accuracy: 0.9205 - lr: 0.0010
Epoch 10/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4313 - accuracy: 0.8703
Epoch 10: val_accuracy did not improve from 0.92192
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4313 - accuracy: 0.8703 - val_loss: 0.3115 - val_accuracy: 0.9159 - lr: 0.0010
Epoch 11/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4286 - accuracy: 0.8716
Epoch 11: val_accuracy did not improve from 0.92192
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4286 - accuracy: 0.8716 - val_loss: 0.2869 - val_accuracy: 0.9212 - lr: 0.0010
Epoch 12/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4258 - accuracy: 0.8729
Epoch 12: val_accuracy did not improve from 0.92192
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4258 - accuracy: 0.8728 - val_loss: 0.3241 - val_accuracy: 0.9076 - lr: 0.0010
Epoch 13/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4291 - accuracy: 0.8720
Epoch 13: val_accuracy did not improve from 0.92192
2250/2250 [==============================] - 17s 7ms/step - loss: 0.4290 - accuracy: 0.8720 - val_loss: 0.2977 - val_accuracy: 0.9182 - lr: 0.0010
Epoch 14/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4275 - accuracy: 0.8720
Epoch 14: val_accuracy did not improve from 0.92192
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4276 - accuracy: 0.8720 - val_loss: 0.2986 - val_accuracy: 0.9166 - lr: 0.0010
Epoch 15/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4274 - accuracy: 0.8721
Epoch 15: val_accuracy did not improve from 0.92192
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4274 - accuracy: 0.8721 - val_loss: 0.2964 - val_accuracy: 0.9199 - lr: 0.0010
Epoch 16/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4292 - accuracy: 0.8710
Epoch 16: val_accuracy did not improve from 0.92192
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4291 - accuracy: 0.8711 - val_loss: 0.3117 - val_accuracy: 0.9178 - lr: 0.0010
Epoch 17/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4264 - accuracy: 0.8719
Epoch 17: val_accuracy did not improve from 0.92192
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4265 - accuracy: 0.8719 - val_loss: 0.2925 - val_accuracy: 0.9194 - lr: 0.0010
Epoch 18/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4260 - accuracy: 0.8725
Epoch 18: val_accuracy did not improve from 0.92192
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4260 - accuracy: 0.8724 - val_loss: 0.2952 - val_accuracy: 0.9181 - lr: 0.0010
Epoch 19/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4243 - accuracy: 0.8728
Epoch 19: val_accuracy did not improve from 0.92192
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4243 - accuracy: 0.8728 - val_loss: 0.2866 - val_accuracy: 0.9193 - lr: 0.0010
Epoch 20/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4263 - accuracy: 0.8729
Epoch 20: val_accuracy did not improve from 0.92192
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4263 - accuracy: 0.8730 - val_loss: 0.2925 - val_accuracy: 0.9198 - lr: 0.0010
Epoch 21/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4235 - accuracy: 0.8743
Epoch 21: val_accuracy did not improve from 0.92192
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4237 - accuracy: 0.8742 - val_loss: 0.2951 - val_accuracy: 0.9175 - lr: 0.0010
Epoch 22/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4274 - accuracy: 0.8724
Epoch 22: val_accuracy did not improve from 0.92192
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4273 - accuracy: 0.8724 - val_loss: 0.2988 - val_accuracy: 0.9181 - lr: 0.0010
Epoch 23/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4258 - accuracy: 0.8728
Epoch 23: val_accuracy did not improve from 0.92192
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4257 - accuracy: 0.8727 - val_loss: 0.2936 - val_accuracy: 0.9172 - lr: 0.0010
Epoch 24/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4235 - accuracy: 0.8733
Epoch 24: val_accuracy did not improve from 0.92192
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4235 - accuracy: 0.8733 - val_loss: 0.2904 - val_accuracy: 0.9218 - lr: 0.0010
Epoch 25/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4267 - accuracy: 0.8722
Epoch 25: val_accuracy improved from 0.92192 to 0.92317, saving model to final_fanout_model.h5
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4266 - accuracy: 0.8722 - val_loss: 0.2839 - val_accuracy: 0.9232 - lr: 0.0010
Epoch 26/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4236 - accuracy: 0.8729
Epoch 26: val_accuracy did not improve from 0.92317
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4236 - accuracy: 0.8729 - val_loss: 0.2912 - val_accuracy: 0.9216 - lr: 0.0010
Epoch 27/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4272 - accuracy: 0.8728
Epoch 27: val_accuracy did not improve from 0.92317
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4272 - accuracy: 0.8728 - val_loss: 0.2898 - val_accuracy: 0.9213 - lr: 0.0010
Epoch 28/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4243 - accuracy: 0.8730
Epoch 28: val_accuracy did not improve from 0.92317
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4243 - accuracy: 0.8731 - val_loss: 0.2936 - val_accuracy: 0.9199 - lr: 0.0010
Epoch 29/200
2242/2250 [============================>.] - ETA: 0s - loss: 0.4243 - accuracy: 0.8734
Epoch 29: val_accuracy did not improve from 0.92317
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4244 - accuracy: 0.8734 - val_loss: 0.2947 - val_accuracy: 0.9190 - lr: 0.0010
Epoch 30/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4238 - accuracy: 0.8738
Epoch 30: val_accuracy did not improve from 0.92317
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4238 - accuracy: 0.8738 - val_loss: 0.2856 - val_accuracy: 0.9221 - lr: 0.0010
Epoch 31/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4234 - accuracy: 0.8735
Epoch 31: val_accuracy did not improve from 0.92317
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4234 - accuracy: 0.8735 - val_loss: 0.2901 - val_accuracy: 0.9211 - lr: 0.0010
Epoch 32/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4259 - accuracy: 0.8729
Epoch 32: val_accuracy did not improve from 0.92317
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4259 - accuracy: 0.8729 - val_loss: 0.2884 - val_accuracy: 0.9215 - lr: 0.0010
Epoch 33/200
2242/2250 [============================>.] - ETA: 0s - loss: 0.4252 - accuracy: 0.8734
Epoch 33: val_accuracy did not improve from 0.92317
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4253 - accuracy: 0.8734 - val_loss: 0.2857 - val_accuracy: 0.9207 - lr: 0.0010
Epoch 34/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4250 - accuracy: 0.8735
Epoch 34: val_accuracy did not improve from 0.92317
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4250 - accuracy: 0.8735 - val_loss: 0.3186 - val_accuracy: 0.9153 - lr: 0.0010
Epoch 35/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4233 - accuracy: 0.8735
Epoch 35: val_accuracy did not improve from 0.92317
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4234 - accuracy: 0.8735 - val_loss: 0.3104 - val_accuracy: 0.9164 - lr: 0.0010
Epoch 36/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4268 - accuracy: 0.8728
Epoch 36: val_accuracy did not improve from 0.92317
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4267 - accuracy: 0.8728 - val_loss: 0.2926 - val_accuracy: 0.9227 - lr: 0.0010
Epoch 37/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4234 - accuracy: 0.8744
Epoch 37: val_accuracy did not improve from 0.92317
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4235 - accuracy: 0.8743 - val_loss: 0.3038 - val_accuracy: 0.9187 - lr: 0.0010
Epoch 38/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4247 - accuracy: 0.8730
Epoch 38: val_accuracy did not improve from 0.92317
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4247 - accuracy: 0.8730 - val_loss: 0.2951 - val_accuracy: 0.9180 - lr: 0.0010
Epoch 39/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4249 - accuracy: 0.8729
Epoch 39: val_accuracy did not improve from 0.92317
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4249 - accuracy: 0.8729 - val_loss: 0.2979 - val_accuracy: 0.9202 - lr: 0.0010
Epoch 40/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4210 - accuracy: 0.8744
Epoch 40: val_accuracy did not improve from 0.92317
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4211 - accuracy: 0.8744 - val_loss: 0.2872 - val_accuracy: 0.9221 - lr: 0.0010
Epoch 41/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4229 - accuracy: 0.8734
Epoch 41: val_accuracy did not improve from 0.92317
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4229 - accuracy: 0.8734 - val_loss: 0.2968 - val_accuracy: 0.9163 - lr: 0.0010
Epoch 42/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4215 - accuracy: 0.8737
Epoch 42: val_accuracy did not improve from 0.92317
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4216 - accuracy: 0.8737 - val_loss: 0.3033 - val_accuracy: 0.9191 - lr: 0.0010
Epoch 43/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4242 - accuracy: 0.8730
Epoch 43: val_accuracy did not improve from 0.92317
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4242 - accuracy: 0.8730 - val_loss: 0.3002 - val_accuracy: 0.9208 - lr: 0.0010
Epoch 44/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4218 - accuracy: 0.8749
Epoch 44: val_accuracy did not improve from 0.92317
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4218 - accuracy: 0.8749 - val_loss: 0.2968 - val_accuracy: 0.9168 - lr: 0.0010
Epoch 45/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4211 - accuracy: 0.8741
Epoch 45: val_accuracy did not improve from 0.92317
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4211 - accuracy: 0.8741 - val_loss: 0.2905 - val_accuracy: 0.9228 - lr: 0.0010
Epoch 45: early stopping
Train: 0.923, Test: 0.917 313/313 [==============================] - 1s 2ms/step
# plot actual and predicted images for 10 random images that were misclassified
pred = fanout_model.predict(X_test)
# get images where predictions and actual labels don't match
incorrect = np.nonzero(pred.argmax(axis=1) != y_test_label)[0]
# select 10 random images from those where prediction and actual label don't match
incorrect = np.random.choice(incorrect, 10)
# plot 10 random images where prediction and actual label don't match
types = [ 'T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
fig, ax = plt.subplots(2, 5, figsize=(15, 6))
for i, incorrect in enumerate(incorrect):
ax[i//5, i%5].imshow(X_test[incorrect].reshape(28, 28), cmap='gray', interpolation='none')
# title for each image will be actual and predicted label
ax[i//5, i%5].set_title("Predicted {}, Class {}".format(types[pred[incorrect].argmax()], types[y_test_label[incorrect]]))
ax[i//5, i%5].axis('off')
313/313 [==============================] - 1s 2ms/step
pred = fanout_model.predict(X_test)
pred = np.argmax(pred, axis=1)
classifation_matrix = confusion_matrix(y_test_label, pred)
# plot confusion matrix
plt.figure(figsize=(10,10))
sns.heatmap(classifation_matrix, annot=True, fmt="d")
plt.title("Confusion matrix")
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
313/313 [==============================] - 1s 2ms/step
Model_scores
| Model | Accuracy | Loss | Precision | Recall | F1 Score | |
|---|---|---|---|---|---|---|
| 0 | Dense 1 layer NN | 0.8798 | 0.359683 | 0.880470 | 0.8798 | 0.878955 |
| 1 | Dense NN 2 layer | 0.8798 | 0.359683 | 0.867431 | 0.8660 | 0.863119 |
| 2 | Dense NN 3 layer | 0.8798 | 0.359683 | 0.876298 | 0.8727 | 0.872353 |
| 3 | CNN linear activation | 0.8798 | 0.359683 | 0.824820 | 0.8181 | 0.817135 |
| 4 | CNN linear max pool | 0.8714 | 0.751681 | 0.871716 | 0.8714 | 0.871052 |
| 5 | CNN linear avg pool | 0.8306 | 0.489766 | 0.834260 | 0.8306 | 0.831207 |
| 6 | CNN linear avg pool | 0.7676 | 0.636280 | 0.800908 | 0.7676 | 0.763595 |
| 7 | linear batchnorm high epochs w data aug | 0.7949 | 0.586244 | 0.807667 | 0.7949 | 0.794490 |
| 8 | linear batchnorm high epochs w data aug | 0.8304 | 0.494241 | 0.832257 | 0.8304 | 0.830593 |
| 9 | CNN relu adam w dropout | 0.8826 | 1.002725 | 0.883670 | 0.8826 | 0.883015 |
| 10 | Machine Learning Mastery model | 0.8826 | 1.002725 | 0.897519 | 0.8960 | 0.896498 |
| 11 | Machine Learning Mastery model | 0.8826 | 1.002725 | 0.898155 | 0.8952 | 0.895019 |
| 12 | Final model | 0.9137 | 1.002725 | 0.914589 | 0.9137 | 0.913904 |
| 13 | Fan out final model | 0.9168 | 1.002725 | 0.917947 | 0.9168 | 0.917047 |
Overall , I think it was a good learning experience as I managed to improve and beat a model created by Machine Learning Master. I hope to learn more about CNNs and other deep learning models in the future